Import Data¶

In [ ]:
import requests
from pyspark.sql import SparkSession
import pyspark.pandas as ps
import matplotlib.pyplot as plt
import pprint
from pyspark.sql.functions import from_unixtime, col, mean, stddev, abs as pyspark_abs
import pandas as pd
from pyspark.sql.functions import col, count, split, size, length, when, isnan, regexp_extract, expr
ps.set_option('plotting.backend', 'matplotlib')
from pyspark.sql import Row
from pyspark.sql.types import StructType, StructField, StringType
import re

# Initialize Spark session
spark = SparkSession.builder.appName("ReadCSVFile").getOrCreate()

# Define the presigned URL
local_file_path = 'dbfs:/FileStore/files/FinalClean_100K.csv'

# Load the local CSV file into a pandas-on-Spark DataFrame
df = ps.read_csv(local_file_path, sep=";")

# Show the first few rows of the DataFrame
df.head()
Out[ ]:
category title body amenities bathrooms bedrooms currency fee has_photo pets_allowed price price_type square_feet cityname state latitude longitude source time week_of_month has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_None pets_allowed_Yes
0 housing/rent Excellent home with 2 bdrooms, loft, and 1 BA.... Every room has a ceiling fan in it, along with... Alarm,Fireplace,Refrigerator 1.0 3 USD No Yes Cats,Dogs 800.0 Monthly 2500.0 Kansas City MO 39.0342 -94.5429 RentDigs.com 2019-02-22 07:39:28 4 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1
1 housing/rent/apartment $1,000 / Two BR - Great Deal. MUST SEE. Cat OK! Spacious two beds apartment in historic Bowers... AC,Cable or Satellite,Dishwasher,Garbage Dispo... 2.0 2 USD No Yes Cats 1000.0 Monthly 875.0 Richmond VA 37.5423 -77.4347 RentDigs.com 2019-02-22 09:43:14 4 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 1
2 housing/rent/apartment $1,017 / One BR - Great Deal. MUST SEE! Square footage: 780 square feet, unit number: ... AC,Dishwasher,Fireplace,Gated,Gym,Patio/Deck,P... 1.0 1 USD No Thumbnail Cats,Dogs 1017.0 Monthly 780.0 Lawrenceville GA 33.9222 -84.0725 RentDigs.com 2019-09-18 03:09:41 3 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 1 1 0 0 0 1
3 housing/rent/apartment $1,023 / Two BR - Great Deal. MUST SEE. Pet OK! Come home and enjoy all the luxuries you Fores... Parking,Pool,Washer Dryer 2.0 2 USD No Yes Cats,Dogs 1023.0 Monthly 1115.0 Bahama NC 36.1599 -78.8975 RentDigs.com 2019-09-17 21:51:57 3 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1
4 housing/rent/apartment $1,025/mo \ Two BA \ Apartment - convenient lo... Square footage: 1300 square ft, unit number: 5... Pool 1.5 2 USD No Yes Cats,Dogs 1025.0 Monthly 1300.0 Tampa FL 28.0395 -82.3952 RentDigs.com 2019-02-22 09:26:04 4 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
In [ ]:
display(df.columns)
Index(['category', 'title', 'body', 'amenities', 'bathrooms', 'bedrooms',
       'currency', 'fee', 'has_photo', 'pets_allowed', 'price', 'price_type',
       'square_feet', 'cityname', 'state', 'latitude', 'longitude', 'source',
       'time', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
       'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
       'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
       'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
       'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
       'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
       'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
       'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
       'pets_allowed_Yes'],
      dtype='object')
In [ ]:
included_numeric_types= ['float64', 'int64', 'float32','int32','int64','int8']
numeric_df = df.select_dtypes(include=included_numeric_types)
numeric_df.columns
Out[ ]:
Index(['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude',
       'longitude', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
       'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
       'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
       'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
       'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
       'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
       'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
       'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
       'pets_allowed_Yes'],
      dtype='object')
In [ ]:
numeric_df.head()
Out[ ]:
bathrooms bedrooms price square_feet latitude longitude week_of_month has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_None pets_allowed_Yes
6056 2.0 2 1007.0 955.0 35.7551 -78.7199 3 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 1
6057 1.0 1 1009.0 780.0 36.1106 -79.7406 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1
6058 1.0 2 1010.0 1075.0 36.8379 -76.0939 3 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1
6059 1.0 1 1010.0 659.0 39.1627 -76.6354 3 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1
6060 2.0 2 1011.0 1000.0 33.9743 -84.2384 2 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1
In [ ]:
print(numeric_df.columns)
Index(['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude',
       'longitude', 'week_of_month', 'has_Tennis', 'has_Parking', 'has_Alarm',
       'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
       'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
       'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
       'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
       'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
       'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
       'has_photo_no', 'has_photo_yes', 'pets_allowed_None',
       'pets_allowed_Yes'],
      dtype='object')
In [ ]:
#remove spatial outliers. 
numeric_df = numeric_df[numeric_df['longitude'] >= -130]
numeric_df = numeric_df.drop(columns='pets_allowed_None')
In [ ]:
numeric_df.head()
Out[ ]:
bathrooms bedrooms price square_feet latitude longitude week_of_month has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_Yes
0 1.0 3 800.0 2500.0 39.0342 -94.5429 4 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1
1 2.0 2 1000.0 875.0 37.5423 -77.4347 4 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1
2 1.0 1 1017.0 780.0 33.9222 -84.0725 3 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1
3 2.0 2 1023.0 1115.0 36.1599 -78.8975 3 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1
4 1.5 2 1025.0 1300.0 28.0395 -82.3952 4 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
In [ ]:
import pyspark.pandas as ps

# Assuming numeric_df is your Spark on Pandas DataFrame
# Select only the columns you want
columns_to_include = ['bathrooms', 'bedrooms', 'price', 'square_feet', 'latitude', 'longitude']
selected_df = numeric_df[columns_to_include]

# Generate the description
description = selected_df.describe()

# Function to format to 2 decimal places
def format_to_2_decimals(x):
    try:
        return f"{float(x):.2f}"
    except (ValueError, TypeError):
        return x

# Apply the formatting function to all cells except the 'count' row
formatted_description = description.apply(lambda x: x.map(format_to_2_decimals) if x.name != 'count' else x)

# Display the result
display(formatted_description)
bathroomsbedroomspricesquare_feetlatitudelongitude
99517.0099517.0099517.0099517.0099517.0099517.00
1.441.731525.54956.0536.93-91.49
0.550.75902.06387.224.5615.74
0.000.00100.00101.0024.56-124.23
1.001.001014.00730.0033.75-104.79
1.002.001350.00900.0037.21-84.54
2.002.001795.001115.0039.96-77.57
9.009.0052500.0040000.0048.85-68.78
In [ ]:
numeric_df.describe()
Out[ ]:
bathrooms bedrooms price square_feet latitude longitude week_of_month has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_Yes
count 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.00000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000
mean 1.444628 1.725745 1525.544319 956.051479 36.934093 -91.487287 3.161992 0.085664 0.441040 0.003658 0.000271 0.045279 0.192389 0.113930 0.149874 0.126029 0.159953 0.087171 0.438227 0.089251 0.111549 0.021042 0.043601 0.040124 0.375896 0.217923 0.002191 0.166776 0.262096 0.266879 0.038938 0.00208 0.159159 0.150316 0.092748 0.562818 0.949516
std 0.547771 0.750469 902.055852 387.219284 4.560699 15.737561 0.834220 0.279868 0.496514 0.060368 0.016469 0.207916 0.394179 0.317728 0.356950 0.331883 0.366564 0.282087 0.496172 0.285107 0.314812 0.143524 0.204206 0.196251 0.484356 0.412837 0.046753 0.372777 0.439777 0.442331 0.193448 0.04556 0.365826 0.357383 0.290080 0.496041 0.218942
min 0.000000 0.000000 100.000000 101.000000 24.564500 -124.226500 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 1.000000 1.000000 1014.000000 730.000000 33.746500 -104.791900 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 1.000000
50% 1.000000 2.000000 1350.000000 900.000000 37.213900 -84.538200 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 1.000000 1.000000
75% 2.000000 2.000000 1795.000000 1115.000000 39.955900 -77.569900 4.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.00000 0.000000 0.000000 0.000000 1.000000 1.000000
max 9.000000 9.000000 52500.000000 40000.000000 48.846700 -68.778800 5.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000

Correlation Matrix¶

Our data may have a lot of highly correlated columns so we need to perform a bit of feature reduction to decrease multicollinearity. We start with a correlation matrix heatmap to view columns with correlations higher than .6

We removed highly correlated and constant columns in our data preprocessing but we may need to continue removing columns. Currently, bedroom, bathrooms, and square footage is highly correlated which makes sense. We may not be able to untangle this easily, we will perform VIF analysis to see if there are any very highly correlated columns which we can edit there.

In [ ]:
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

numeric_column_names = numeric_df.columns.tolist()
# Convert pandas-on-Spark DataFrame to PySpark DataFrame
df_spark = numeric_df.to_spark()

# Convert to vector column
vector_col = "corr_features"
assembler = VectorAssembler(inputCols=df_spark.columns, outputCol=vector_col)
df_vector = assembler.transform(df_spark).select(vector_col)

# Calculate correlation matrix
matrix = Correlation.corr(df_vector, vector_col)

# Convert to numpy array
corr_matrix = matrix.collect()[0]["pearson({})".format(vector_col)].toArray()

# Create a heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0,
            xticklabels=numeric_column_names, yticklabels=numeric_column_names)
plt.title('Correlation Matrix')
plt.tight_layout()
No description has been provided for this image

We must remove the has_unknown column to decrease ambiguity of the features. Same with allows_pets_unknown. These were added as the NaN values and we filled them with 'unknown'

In [ ]:
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

def compute_correlation_matrix(df, method='pearson', threshold=0.6):

    numeric_columns = df.columns.tolist()

    # Convert pandas-on-Spark DataFrame to PySpark DataFrame
    df_spark = df[numeric_columns].to_spark()

    # Create a vector column
    vector_col = "features"
    assembler = VectorAssembler(inputCols=numeric_columns, outputCol=vector_col)
    df_vector = assembler.transform(df_spark)

    # Cache the dataset
    df_vector.cache()

    # Calculate correlation matrix
    correlation = Correlation.corr(df_vector, vector_col, method)

    # Extract the correlation matrix
    correlation_matrix = correlation.collect()[0][0].toArray()

    # Unpersist the cached data
    df_vector.unpersist()

    return correlation_matrix, numeric_columns

def plot_correlation_heatmap(correlation_matrix, column_names, threshold=0.6, title='Correlation Matrix'):
    # Create a mask for correlations below the threshold (in absolute value)
    mask = np.abs(correlation_matrix) <= threshold

    # Set diagonal to False to always show self-correlations
    np.fill_diagonal(mask, False)

    # Calculate figure size based on number of columns
    n_cols = len(column_names)
    fig_size = (n_cols * 0.8 + 2, n_cols * 0.8 + 2)  # Adjust these multipliers as needed

    # Create a heatmap
    plt.figure(figsize=fig_size)
    sns.heatmap(correlation_matrix, 
                mask=mask,
                annot=True, 
                cmap='coolwarm', 
                vmin=-1, 
                vmax=1, 
                center=0,
                square=True, 
                linewidths=0.5, 
                fmt='.2f',
                xticklabels=column_names,
                yticklabels=column_names,
                annot_kws={"size": 16},  # Adjust text size as needed
                cbar_kws={"shrink": .8})  # Adjust colorbar size

    plt.title(f"{title} (|r| > {threshold})")
    plt.xticks(rotation=75)
    plt.yticks(rotation=0)
    # plt.gcf()
    
    # If you're in a Databricks notebook, use display() instead of plt.show()
    return plt.tight_layout()

# Compute correlation matrix (you can change 'pearson' to 'spearman' if needed)
correlation_matrix, column_names = compute_correlation_matrix(numeric_df, method='pearson')

# Plot the heatmap (you can adjust the threshold here)
plot_correlation_heatmap(correlation_matrix, column_names, threshold=0.6)
No description has been provided for this image
In [ ]:
def plot_correlation_heatmap_WithFilter(correlation_matrix, column_names, threshold=0.6, title='Correlation Matrix'):
    # Create a mask for correlations below the threshold (in absolute value)
    mask = np.abs(correlation_matrix) <= threshold

    # Set diagonal to True to exclude self-correlations from consideration
    np.fill_diagonal(mask, True)

    # Find columns with at least one correlation above the threshold
    columns_to_keep = ~mask.all(axis=0)
    
    # If no columns meet the criteria, return early
    if not np.any(columns_to_keep):
        print("No correlations above the threshold were found.")
        return None

    # Filter the correlation matrix and column names
    filtered_matrix = correlation_matrix[columns_to_keep][:, columns_to_keep]
    filtered_column_names = [column_names[i] for i in range(len(column_names)) if columns_to_keep[i]]

    # Recreate the mask for the filtered matrix
    filtered_mask = np.abs(filtered_matrix) <= threshold
    np.fill_diagonal(filtered_mask, False)  # Show self-correlations in the plot

    # Calculate figure size based on number of columns
    n_cols = len(filtered_column_names)
    fig_size = (n_cols * 0.8 + 2, n_cols * 0.8 + 2)  # Adjust these multipliers as needed

    # Create a heatmap
    plt.figure(figsize=fig_size)
    sns.heatmap(filtered_matrix, 
                mask=filtered_mask,
                annot=True, 
                cmap='coolwarm', 
                vmin=-1, 
                vmax=1, 
                center=0,
                square=True, 
                linewidths=0.5, 
                fmt='.2f',
                xticklabels=filtered_column_names,
                yticklabels=filtered_column_names,
                annot_kws={"size": 16},  # Adjust text size as needed
                cbar_kws={"shrink": .8})  # Adjust colorbar size

    plt.title(f"{title} (|r| > {threshold})")
    plt.xticks(rotation=45, ha="right")
    plt.yticks(rotation=0)
    
    return plt.tight_layout()

    
# Plot the heatmap (you can adjust the threshold here)
plot_correlation_heatmap_WithFilter(correlation_matrix, column_names, threshold=0.6)
No description has been provided for this image
In [ ]:
 
# Compute correlation matrix
corr_matrix = numeric_df.corr()

# Apply threshold
high_corr = corr_matrix.where(np.abs(corr_matrix) > .6)
display(high_corr)
bathroomsbedroomspricesquare_feetlatitudelongitudeweek_of_monthhas_Tennishas_Parkinghas_Alarmhas_Golfhas_TVhas_Clubhousehas_Playgroundhas_Refrigeratorhas_Cable_or_Satellitehas_Unknownhas_Gatedhas_Poolhas_Wood_Floorshas_Internet_Accesshas_Viewhas_Elevatorhas_Hot_Tubhas_Gymhas_Storagehas_Doormanhas_Dishwasherhas_Washer_Dryerhas_Patio/Deckhas_Garbage_Disposalhas_Luxuryhas_AChas_Fireplacehas_photo_nohas_photo_yespets_allowed_Yes
1.00.6785507254992077NaN0.6716269592610624NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
0.67855072549920771.0NaN0.6615681672898076NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
0.67162695926106240.6615681672898076NaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.6624100094559625NaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.6624100094559625NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaN
NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0

Calulating VIF for Entire Dataset¶

This is looking at the entire dataset to see if we have any very highly correlated columns. We see in the correlation matrix we have correlated columns but VIF will help us see a little more about how they may interact. Usually a VIF over 5 is too high and needs to be removed.

In [ ]:
type(numeric_df)
Out[ ]:
pyspark.pandas.frame.DataFrame
In [ ]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
from pyspark.ml.evaluation import RegressionEvaluator
import pyspark.pandas as ps
from concurrent.futures import ThreadPoolExecutor, as_completed

# Ensure Spark session is created
spark = SparkSession.builder \
    .appName("VIF Calculation") \
    .config("spark.sql.shuffle.partitions", "200") \
    .getOrCreate()

# Assuming numeric_df is already defined as a pandas-on-Spark DataFrame
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()

# List to store VIF results
vif_results = []

# Repartition the DataFrame based on its size
num_partitions = vector_df.rdd.getNumPartitions()
optimal_partitions = min(num_partitions, 200)
vector_df = vector_df.repartition(optimal_partitions)

# Function to calculate VIF for a single column
def calculate_vif(target_col, df):
    feature_cols = [col for col in vector_df.columns if col not in [target_col, 'unique_id']]    
    assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
    assembled_df = assembler.transform(df)
    train_data = assembled_df.select("features", target_col).cache()

    lr = LinearRegression(featuresCol="features", labelCol=target_col)
    lr_model = lr.fit(train_data)
    predictions = lr_model.transform(train_data)

    evaluator = RegressionEvaluator(predictionCol="prediction", labelCol=target_col, metricName="r2")
    r_sq = evaluator.evaluate(predictions)
    vif = 1 / (1 - r_sq) if r_sq != 1 else float('inf')
    
    return (target_col, vif)

# Using ThreadPoolExecutor for concurrent execution
with ThreadPoolExecutor(max_workers=8) as executor:  # Adjust the number of workers as needed
    futures = {executor.submit(calculate_vif, col, vector_df): col for col in vector_df.columns}
    for future in as_completed(futures):
        col = futures[future]
        try:
            vif = future.result()
            vif_results.append(vif)
            # print(f"VIF for {col}: {vif[1]}")
        except Exception as e:
            print(f"Error calculating VIF for {col}: {e}")

# Print VIF results at the end
print("\nSummary of VIF for all columns:")

# Sort the vif_results list by VIF value in descending order
sorted_vif_results = sorted(vif_results, key=lambda x: x[1], reverse=True)

# Print the sorted results
for col_name, vif_value in sorted_vif_results:
    print(f"VIF for {col_name}: {vif_value}")

# for col_name, vif_value in vif_results:
#     print(f"VIF for {col_name}: {vif_value}")
Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]
Uploading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]
Summary of VIF for all columns:
VIF for bathrooms: 2.3624851836755787
VIF for square_feet: 2.339454459038783
VIF for bedrooms: 2.2664234690126075
VIF for has_Dishwasher: 2.1810783161091045
VIF for has_Refrigerator: 1.9591729080835734
VIF for has_Cable_or_Satellite: 1.6179120486024876
VIF for has_Gym: 1.6033882016075538
VIF for has_Unknown: 1.5758371809301963
VIF for has_Pool: 1.5703902700609371
VIF for has_photo_yes: 1.4836796573204838
VIF for has_AC: 1.4530923893716492
VIF for has_Washer_Dryer: 1.3539197127525244
VIF for has_Internet_Access: 1.3451229162250355
VIF for has_Clubhouse: 1.3295707004312385
VIF for price: 1.3242504302753955
VIF for has_Patio/Deck: 1.307685706804794
VIF for week_of_month: 1.2931466480414795
VIF for has_Parking: 1.2819032492905715
VIF for has_Garbage_Disposal: 1.2579875998447216
VIF for has_Fireplace: 1.2163191988166653
VIF for has_Playground: 1.1939382684898312
VIF for has_photo_no: 1.1842839427331735
VIF for has_Gated: 1.1659203164426049
VIF for has_Elevator: 1.1624553131603872
VIF for has_Storage: 1.1564350130457361
VIF for longitude: 1.1187435796303882
VIF for has_Tennis: 1.1164598198761602
VIF for has_Hot_Tub: 1.0935891982712334
VIF for latitude: 1.0873040328865666
VIF for has_TV: 1.0768832665888608
VIF for has_Wood_Floors: 1.0634335456520498
VIF for pets_allowed_Yes: 1.0523330027747977
VIF for has_View: 1.0257949576035088
VIF for has_Alarm: 1.01582526351512
VIF for has_Doorman: 1.0122277512867157
VIF for has_Luxury: 1.0091488084528268
VIF for has_Golf: 1.0054774736471004

We have good VIF scores in our dataset so we will not remove any more columns. If there are moderately correlated columns we will take that into account in our analysis later on.

Summary Stats on dataset¶

In [ ]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, mean, stddev, min as spark_min, max as spark_max
import pyspark.pandas as ps
from concurrent.futures import ThreadPoolExecutor, as_completed

# Ensure Spark session is created
spark = SparkSession.builder \
    .appName("Summary Statistics Calculation") \
    .config("spark.sql.shuffle.partitions", "200") \
    .getOrCreate()

# Assuming numeric_df is already defined as a pandas-on-Spark DataFrame
# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()

# List to store summary statistics results
summary_results = []

# Repartition the DataFrame based on its size
num_partitions = vector_df.rdd.getNumPartitions()
optimal_partitions = __builtins__.min(num_partitions, 200) 
vector_df = vector_df.repartition(optimal_partitions)

# Function to calculate summary statistics for a single column
def calculate_summary_stats(column_name, df):
    summary_stats = df.select(
        mean(col(column_name)).alias("mean"),
        stddev(col(column_name)).alias("stddev"),
        spark_min(col(column_name)).alias("min"),
        spark_max(col(column_name)).alias("max")
    ).collect()[0]
    
    summary = {
        "column": column_name,
        "mean": summary_stats["mean"],
        "stddev": summary_stats["stddev"],
        "min": summary_stats["min"],
        "max": summary_stats["max"]
    }
    return summary

# Using ThreadPoolExecutor for concurrent execution
with ThreadPoolExecutor(max_workers=8) as executor:  # Adjust the number of workers as needed
    futures = {executor.submit(calculate_summary_stats, col_name, vector_df): col_name for col_name in vector_df.columns}
    for future in as_completed(futures):
        col_name = futures[future]
        try:
            summary = future.result()
            summary_results.append(summary)
            print(f"Summary statistics for {col_name}: {summary}")
        except Exception as e:
            print(f"Error calculating summary statistics for {col_name}: {e}")

# Print summary statistics results at the end
print("\nSummary Statistics for all columns:")
for result in summary_results:
    print(result)
Summary statistics for bedrooms: {'column': 'bedrooms', 'mean': 1.7257453500406965, 'stddev': 0.7504689092373312, 'min': 0, 'max': 9}
Summary statistics for week_of_month: {'column': 'week_of_month', 'mean': 3.1619924234050463, 'stddev': 0.8342202772785027, 'min': 1, 'max': 5}
Summary statistics for bathrooms: {'column': 'bathrooms', 'mean': 1.4446275510716762, 'stddev': 0.5477713573635926, 'min': 0.0, 'max': 9.0}
Summary statistics for has_Tennis: {'column': 'has_Tennis', 'mean': 0.08566375594119598, 'stddev': 0.27986829746043457, 'min': 0, 'max': 1}
Summary statistics for square_feet: {'column': 'square_feet', 'mean': 956.05147864184, 'stddev': 387.2192844112672, 'min': 101.0, 'max': 40000.0}
Summary statistics for price: {'column': 'price', 'mean': 1525.5443189605796, 'stddev': 902.0558524870771, 'min': 100.0, 'max': 52500.0}
Summary statistics for longitude: {'column': 'longitude', 'mean': -91.48728659826962, 'stddev': 15.737561003856273, 'min': -124.2265, 'max': -68.7788}
Summary statistics for latitude: {'column': 'latitude', 'mean': 36.934092578152516, 'stddev': 4.560699440151283, 'min': 24.5645, 'max': 48.8467}
Summary statistics for has_Parking: {'column': 'has_Parking', 'mean': 0.44104022428328826, 'stddev': 0.4965140703692728, 'min': 0, 'max': 1}
Summary statistics for has_Alarm: {'column': 'has_Alarm', 'mean': 0.0036576665293366964, 'stddev': 0.06036824185793644, 'min': 0, 'max': 1}
Summary statistics for has_TV: {'column': 'has_TV', 'mean': 0.0452786961021735, 'stddev': 0.20791577662422658, 'min': 0, 'max': 1}
Summary statistics for has_Golf: {'column': 'has_Golf', 'mean': 0.00027131042937387584, 'stddev': 0.016469351705051993, 'min': 0, 'max': 1}
Summary statistics for has_Playground: {'column': 'has_Playground', 'mean': 0.11393028326818533, 'stddev': 0.3177281672028879, 'min': 0, 'max': 1}
Summary statistics for has_Clubhouse: {'column': 'has_Clubhouse', 'mean': 0.1923892400293417, 'stddev': 0.3941791238298043, 'min': 0, 'max': 1}
Summary statistics for has_Cable_or_Satellite: {'column': 'has_Cable_or_Satellite', 'mean': 0.12602871871137594, 'stddev': 0.3318833945576695, 'min': 0, 'max': 1}
Summary statistics for has_Refrigerator: {'column': 'has_Refrigerator', 'mean': 0.14987389089301326, 'stddev': 0.3569495595114224, 'min': 0, 'max': 1}
Summary statistics for has_Gated: {'column': 'has_Gated', 'mean': 0.08717103610438418, 'stddev': 0.28208694787488064, 'min': 0, 'max': 1}
Summary statistics for has_Pool: {'column': 'has_Pool', 'mean': 0.43822663464533695, 'stddev': 0.49617187057161083, 'min': 0, 'max': 1}
Summary statistics for has_Unknown: {'column': 'has_Unknown', 'mean': 0.15995257091753168, 'stddev': 0.36656390464268557, 'min': 0, 'max': 1}
Summary statistics for has_View: {'column': 'has_View', 'mean': 0.02104163107810726, 'stddev': 0.14352382321525303, 'min': 0, 'max': 1}
Summary statistics for has_Elevator: {'column': 'has_Elevator', 'mean': 0.04360059085382397, 'stddev': 0.20420577453855282, 'min': 0, 'max': 1}
Summary statistics for has_Wood_Floors: {'column': 'has_Wood_Floors', 'mean': 0.08925108272958389, 'stddev': 0.2851072495882814, 'min': 0, 'max': 1}
Summary statistics for has_Internet_Access: {'column': 'has_Internet_Access', 'mean': 0.11154878061034798, 'stddev': 0.3148120805039674, 'min': 0, 'max': 1}
Summary statistics for has_Hot_Tub: {'column': 'has_Hot_Tub', 'mean': 0.04012379794406986, 'stddev': 0.19625051794718615, 'min': 0, 'max': 1}
Summary statistics for has_Storage: {'column': 'has_Storage', 'mean': 0.21792256599374982, 'stddev': 0.4128365703722296, 'min': 0, 'max': 1}
Summary statistics for has_Gym: {'column': 'has_Gym', 'mean': 0.3758955756302943, 'stddev': 0.48435570528543753, 'min': 0, 'max': 1}
Summary statistics for has_Patio/Deck: {'column': 'has_Patio/Deck', 'mean': 0.2668790256941025, 'stddev': 0.44233084608754325, 'min': 0, 'max': 1}
Summary statistics for has_Washer_Dryer: {'column': 'has_Washer_Dryer', 'mean': 0.2620959233095853, 'stddev': 0.43977675440699454, 'min': 0, 'max': 1}
Summary statistics for has_Luxury: {'column': 'has_Luxury', 'mean': 0.002080046625199715, 'stddev': 0.045560299487497735, 'min': 0, 'max': 1}
Summary statistics for has_Dishwasher: {'column': 'has_Dishwasher', 'mean': 0.1667755257895636, 'stddev': 0.37277720713600654, 'min': 0, 'max': 1}
Summary statistics for has_Garbage_Disposal: {'column': 'has_Garbage_Disposal', 'mean': 0.03893807088236181, 'stddev': 0.19344837439825108, 'min': 0, 'max': 1}
Summary statistics for has_Doorman: {'column': 'has_Doorman', 'mean': 0.002190580503833516, 'stddev': 0.04675258094495654, 'min': 0, 'max': 1}
Summary statistics for has_photo_no: {'column': 'has_photo_no', 'mean': 0.09274797270818051, 'stddev': 0.2900803885426063, 'min': 0, 'max': 1}
Summary statistics for has_AC: {'column': 'has_AC', 'mean': 0.15915873669825256, 'stddev': 0.36582588483039546, 'min': 0, 'max': 1}
Summary statistics for has_photo_yes: {'column': 'has_photo_yes', 'mean': 0.5628184129344735, 'stddev': 0.4960406429938882, 'min': 0, 'max': 1}
Summary statistics for has_Fireplace: {'column': 'has_Fireplace', 'mean': 0.15031602640754846, 'stddev': 0.357382710879453, 'min': 0, 'max': 1}
Summary statistics for pets_allowed_Yes: {'column': 'pets_allowed_Yes', 'mean': 0.9495161630676165, 'stddev': 0.21894223170623736, 'min': 0, 'max': 1}

Summary Statistics for all columns:
{'column': 'bedrooms', 'mean': 1.7257453500406965, 'stddev': 0.7504689092373312, 'min': 0, 'max': 9}
{'column': 'week_of_month', 'mean': 3.1619924234050463, 'stddev': 0.8342202772785027, 'min': 1, 'max': 5}
{'column': 'bathrooms', 'mean': 1.4446275510716762, 'stddev': 0.5477713573635926, 'min': 0.0, 'max': 9.0}
{'column': 'has_Tennis', 'mean': 0.08566375594119598, 'stddev': 0.27986829746043457, 'min': 0, 'max': 1}
{'column': 'square_feet', 'mean': 956.05147864184, 'stddev': 387.2192844112672, 'min': 101.0, 'max': 40000.0}
{'column': 'price', 'mean': 1525.5443189605796, 'stddev': 902.0558524870771, 'min': 100.0, 'max': 52500.0}
{'column': 'longitude', 'mean': -91.48728659826962, 'stddev': 15.737561003856273, 'min': -124.2265, 'max': -68.7788}
{'column': 'latitude', 'mean': 36.934092578152516, 'stddev': 4.560699440151283, 'min': 24.5645, 'max': 48.8467}
{'column': 'has_Parking', 'mean': 0.44104022428328826, 'stddev': 0.4965140703692728, 'min': 0, 'max': 1}
{'column': 'has_Alarm', 'mean': 0.0036576665293366964, 'stddev': 0.06036824185793644, 'min': 0, 'max': 1}
{'column': 'has_TV', 'mean': 0.0452786961021735, 'stddev': 0.20791577662422658, 'min': 0, 'max': 1}
{'column': 'has_Golf', 'mean': 0.00027131042937387584, 'stddev': 0.016469351705051993, 'min': 0, 'max': 1}
{'column': 'has_Playground', 'mean': 0.11393028326818533, 'stddev': 0.3177281672028879, 'min': 0, 'max': 1}
{'column': 'has_Clubhouse', 'mean': 0.1923892400293417, 'stddev': 0.3941791238298043, 'min': 0, 'max': 1}
{'column': 'has_Cable_or_Satellite', 'mean': 0.12602871871137594, 'stddev': 0.3318833945576695, 'min': 0, 'max': 1}
{'column': 'has_Refrigerator', 'mean': 0.14987389089301326, 'stddev': 0.3569495595114224, 'min': 0, 'max': 1}
{'column': 'has_Gated', 'mean': 0.08717103610438418, 'stddev': 0.28208694787488064, 'min': 0, 'max': 1}
{'column': 'has_Pool', 'mean': 0.43822663464533695, 'stddev': 0.49617187057161083, 'min': 0, 'max': 1}
{'column': 'has_Unknown', 'mean': 0.15995257091753168, 'stddev': 0.36656390464268557, 'min': 0, 'max': 1}
{'column': 'has_View', 'mean': 0.02104163107810726, 'stddev': 0.14352382321525303, 'min': 0, 'max': 1}
{'column': 'has_Elevator', 'mean': 0.04360059085382397, 'stddev': 0.20420577453855282, 'min': 0, 'max': 1}
{'column': 'has_Wood_Floors', 'mean': 0.08925108272958389, 'stddev': 0.2851072495882814, 'min': 0, 'max': 1}
{'column': 'has_Internet_Access', 'mean': 0.11154878061034798, 'stddev': 0.3148120805039674, 'min': 0, 'max': 1}
{'column': 'has_Hot_Tub', 'mean': 0.04012379794406986, 'stddev': 0.19625051794718615, 'min': 0, 'max': 1}
{'column': 'has_Storage', 'mean': 0.21792256599374982, 'stddev': 0.4128365703722296, 'min': 0, 'max': 1}
{'column': 'has_Gym', 'mean': 0.3758955756302943, 'stddev': 0.48435570528543753, 'min': 0, 'max': 1}
{'column': 'has_Patio/Deck', 'mean': 0.2668790256941025, 'stddev': 0.44233084608754325, 'min': 0, 'max': 1}
{'column': 'has_Washer_Dryer', 'mean': 0.2620959233095853, 'stddev': 0.43977675440699454, 'min': 0, 'max': 1}
{'column': 'has_Luxury', 'mean': 0.002080046625199715, 'stddev': 0.045560299487497735, 'min': 0, 'max': 1}
{'column': 'has_Dishwasher', 'mean': 0.1667755257895636, 'stddev': 0.37277720713600654, 'min': 0, 'max': 1}
{'column': 'has_Garbage_Disposal', 'mean': 0.03893807088236181, 'stddev': 0.19344837439825108, 'min': 0, 'max': 1}
{'column': 'has_Doorman', 'mean': 0.002190580503833516, 'stddev': 0.04675258094495654, 'min': 0, 'max': 1}
{'column': 'has_photo_no', 'mean': 0.09274797270818051, 'stddev': 0.2900803885426063, 'min': 0, 'max': 1}
{'column': 'has_AC', 'mean': 0.15915873669825256, 'stddev': 0.36582588483039546, 'min': 0, 'max': 1}
{'column': 'has_photo_yes', 'mean': 0.5628184129344735, 'stddev': 0.4960406429938882, 'min': 0, 'max': 1}
{'column': 'has_Fireplace', 'mean': 0.15031602640754846, 'stddev': 0.357382710879453, 'min': 0, 'max': 1}
{'column': 'pets_allowed_Yes', 'mean': 0.9495161630676165, 'stddev': 0.21894223170623736, 'min': 0, 'max': 1}
In [ ]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType

# Create a Spark session if you haven't already
spark_res_df = SparkSession.builder.getOrCreate()

def convert_to_float(value):
    if isinstance(value, (int, float)):
        return float(value)
    else:
        return value  # Return as-is for other types (like strings)

# Preprocess the data
processed_summary_results = []
for item in summary_results:
    processed_item = {
        "column": item["column"],
        "mean": convert_to_float(item["mean"]),
        "stddev": convert_to_float(item["stddev"]),
        "min": convert_to_float(item["min"]),
        "max": convert_to_float(item["max"])
    }
    processed_summary_results.append(processed_item)

# Define the schema
schema = StructType([
    StructField("column", StringType(), True),
    StructField("mean", DoubleType(), True),
    StructField("stddev", DoubleType(), True),
    StructField("min", DoubleType(), True),
    StructField("max", DoubleType(), True)
])

# Create the Spark DataFrame
summary_result_df = spark_res_df.createDataFrame(processed_summary_results, schema)

# Show the DataFrame
summary_result_df.show()
+--------------------+--------------------+--------------------+---------+--------+
|              column|                mean|              stddev|      min|     max|
+--------------------+--------------------+--------------------+---------+--------+
|            bedrooms|  1.7257453500406965|  0.7504689092373312|      0.0|     9.0|
|       week_of_month|  3.1619924234050463|  0.8342202772785027|      1.0|     5.0|
|           bathrooms|  1.4446275510716762|  0.5477713573635926|      0.0|     9.0|
|          has_Tennis| 0.08566375594119598| 0.27986829746043457|      0.0|     1.0|
|         square_feet|     956.05147864184|   387.2192844112672|    101.0| 40000.0|
|               price|  1525.5443189605796|   902.0558524870771|    100.0| 52500.0|
|           longitude|  -91.48728659826962|  15.737561003856273|-124.2265|-68.7788|
|            latitude|  36.934092578152516|   4.560699440151283|  24.5645| 48.8467|
|         has_Parking| 0.44104022428328826|  0.4965140703692728|      0.0|     1.0|
|           has_Alarm|0.003657666529336...| 0.06036824185793644|      0.0|     1.0|
|              has_TV|  0.0452786961021735| 0.20791577662422658|      0.0|     1.0|
|            has_Golf|2.713104293738758...|0.016469351705051993|      0.0|     1.0|
|      has_Playground| 0.11393028326818533|  0.3177281672028879|      0.0|     1.0|
|       has_Clubhouse|  0.1923892400293417|  0.3941791238298043|      0.0|     1.0|
|has_Cable_or_Sate...| 0.12602871871137594|  0.3318833945576695|      0.0|     1.0|
|    has_Refrigerator| 0.14987389089301326|  0.3569495595114224|      0.0|     1.0|
|           has_Gated| 0.08717103610438418| 0.28208694787488064|      0.0|     1.0|
|            has_Pool| 0.43822663464533695| 0.49617187057161083|      0.0|     1.0|
|         has_Unknown| 0.15995257091753168| 0.36656390464268557|      0.0|     1.0|
|            has_View| 0.02104163107810726| 0.14352382321525303|      0.0|     1.0|
+--------------------+--------------------+--------------------+---------+--------+
only showing top 20 rows

In [ ]:
summary_result_df.display()
columnmeanstddevminmax
bedrooms1.72574535004069650.75046890923733120.09.0
week_of_month3.16199242340504630.83422027727850271.05.0
bathrooms1.44462755107167620.54777135736359260.09.0
has_Tennis0.085663755941195980.279868297460434570.01.0
square_feet956.05147864184387.2192844112672101.040000.0
price1525.5443189605796902.0558524870771100.052500.0
longitude-91.4872865982696215.737561003856273-124.2265-68.7788
latitude36.9340925781525164.56069944015128324.564548.8467
has_Parking0.441040224283288260.49651407036927280.01.0
has_Alarm0.00365766652933669640.060368241857936440.01.0
has_TV0.04527869610217350.207915776624226580.01.0
has_Golf2.7131042937387584E-40.0164693517050519930.01.0
has_Playground0.113930283268185330.31772816720288790.01.0
has_Clubhouse0.19238924002934170.39417912382980430.01.0
has_Cable_or_Satellite0.126028718711375940.33188339455766950.01.0
has_Refrigerator0.149873890893013260.35694955951142240.01.0
has_Gated0.087171036104384180.282086947874880640.01.0
has_Pool0.438226634645336950.496171870571610830.01.0
has_Unknown0.159952570917531680.366563904642685570.01.0
has_View0.021041631078107260.143523823215253030.01.0
has_Elevator0.043600590853823970.204205774538552820.01.0
has_Wood_Floors0.089251082729583890.28510724958828140.01.0
has_Internet_Access0.111548780610347980.31481208050396740.01.0
has_Hot_Tub0.040123797944069860.196250517947186150.01.0
has_Storage0.217922565993749820.41283657037222960.01.0
has_Gym0.37589557563029430.484355705285437530.01.0
has_Patio/Deck0.26687902569410250.442330846087543250.01.0
has_Washer_Dryer0.26209592330958530.439776754406994540.01.0
has_Luxury0.0020800466251997150.0455602994874977350.01.0
has_Dishwasher0.16677552578956360.372777207136006540.01.0
has_Garbage_Disposal0.038938070882361810.193448374398251080.01.0
has_Doorman0.0021905805038335160.046752580944956540.01.0
has_photo_no0.092747972708180510.29008038854260630.01.0
has_AC0.159158736698252560.365825884830395460.01.0
has_photo_yes0.56281841293447350.49604064299388820.01.0
has_Fireplace0.150316026407548460.3573827108794530.01.0
pets_allowed_Yes0.94951616306761650.218942231706237360.01.0
In [ ]:
numeric_df.describe()
Out[ ]:
bathrooms bedrooms price square_feet latitude longitude week_of_month has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_Yes
count 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000 99517.00000 99517.000000 99517.000000 99517.000000 99517.000000 99517.000000
mean 1.444628 1.725745 1525.544319 956.051479 36.934093 -91.487287 3.161992 0.085664 0.441040 0.003658 0.000271 0.045279 0.192389 0.113930 0.149874 0.126029 0.159953 0.087171 0.438227 0.089251 0.111549 0.021042 0.043601 0.040124 0.375896 0.217923 0.002191 0.166776 0.262096 0.266879 0.038938 0.00208 0.159159 0.150316 0.092748 0.562818 0.949516
std 0.547771 0.750469 902.055852 387.219284 4.560699 15.737561 0.834220 0.279868 0.496514 0.060368 0.016469 0.207916 0.394179 0.317728 0.356950 0.331883 0.366564 0.282087 0.496172 0.285107 0.314812 0.143524 0.204206 0.196251 0.484356 0.412837 0.046753 0.372777 0.439777 0.442331 0.193448 0.04556 0.365826 0.357383 0.290080 0.496041 0.218942
min 0.000000 0.000000 100.000000 101.000000 24.564500 -124.226500 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 1.000000 1.000000 1014.000000 730.000000 33.746500 -104.791900 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 1.000000
50% 1.000000 2.000000 1350.000000 900.000000 37.213900 -84.538200 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 1.000000 1.000000
75% 2.000000 2.000000 1795.000000 1115.000000 39.955900 -77.569900 4.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.00000 0.000000 0.000000 0.000000 1.000000 1.000000
max 9.000000 9.000000 52500.000000 40000.000000 48.846700 -68.778800 5.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000

Linear Regression across Dataset¶

In [ ]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, when
from pyspark.ml.linalg import Vectors, VectorUDT
from pyspark.ml.feature import StandardScaler, VectorAssembler
from pyspark.ml.regression import LinearRegression
import pyspark.pandas as ps

# Ensure Spark session is created
spark = SparkSession.builder \
    .appName("Linear Regression with Manual Feature Assembly") \
    .config("spark.sql.shuffle.partitions", "200")  \
    .getOrCreate()

# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()

# Define the target column
target_col = "price"

# Manually create one-hot encoded columns for week_of_month
for i in range(1, 5):  # Assuming weeks are 1-4
    vector_df = vector_df.withColumn(f"week_{i}", when(col("week_of_month") == i, 1).otherwise(0))

# Standardize 'square feet'
assembler = VectorAssembler(inputCols=["square_feet"], outputCol="square_feet_vec")
scaler = StandardScaler(inputCol="square_feet_vec", outputCol="scaled_square_feet")
vector_df = assembler.transform(vector_df)
scaler_model = scaler.fit(vector_df)
vector_df = scaler_model.transform(vector_df)

# Extract the scaled square feet from the vector
get_first = udf(lambda v: float(v[0]), returnType=DoubleType())
vector_df = vector_df.withColumn("scaled_square_feet", get_first("scaled_square_feet"))

# Define the order of features
feature_cols = ['latitude', 'longitude', 'has_Tennis', 'has_Parking', 'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse','has_Playground', 'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown','has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access', 'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage', 'has_Doorman','has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck', 'has_Garbage_Disposal','has_Luxury', 'has_AC', 'has_Fireplace', 'has_photo_no', 'has_photo_yes','pets_allowed_Yes', 'bathrooms', 'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3', 'week_4']

# Create a UDF to assemble features into a vector
@udf(returnType=VectorUDT())
def assemble_features(*cols):
    return Vectors.dense([float(c) for c in cols])

# Apply the UDF to create the feature vector
vector_df = vector_df.withColumn("features", assemble_features(*feature_cols))

# Select the features and target columns for training
train_data = vector_df.select("features", target_col)

# Examine the structure of train_data
print("\nStructure of train_data:")
train_data.show(5, truncate=False)

# Print the number of features in train_data
num_features = len(feature_cols)
print(f"\nNumber of features in train_data: {num_features}")

# Set up and train the Linear Regression model
lr = LinearRegression(featuresCol="features", labelCol=target_col)
lr_model = lr.fit(train_data)

# Extract coefficients and intercept
coefficients = lr_model.coefficients
intercept = lr_model.intercept

print(f"\nNumber of coefficients: {len(coefficients)}")

# Compare the number of features and coefficients
if num_features == len(coefficients):
    print("The number of features matches the number of coefficients.")
else:
    print(f"Mismatch: {num_features} features vs {len(coefficients)} coefficients.")

# Get the summary of the model
training_summary = lr_model.summary

# Extract summary statistics
r_squared = training_summary.r2
adjusted_r_squared = training_summary.r2adj
rmse = training_summary.rootMeanSquaredError
mae = training_summary.meanAbsoluteError
explained_variance = training_summary.explainedVariance

# Create a pandas-on-Spark DataFrame for coefficients including the intercept
coeff_psdf = ps.DataFrame({
    "Feature": feature_cols + ["Intercept"],
    "Coefficient": list(coefficients) + [intercept]
})

# Add a new column for the absolute value of coefficients and sort by it
coeff_psdf['abs_coefficient'] = coeff_psdf['Coefficient'].abs()
coeff_psdf = coeff_psdf.sort_values(by='abs_coefficient', ascending=False).drop(columns=['abs_coefficient'])

# Create a pandas-on-Spark DataFrame for summary statistics
summary_stats = {
    "R-squared": [r_squared],
    "Adjusted R-squared": [adjusted_r_squared],
    "RMSE": [rmse],
    "MAE": [mae],
    "Explained Variance": [explained_variance],
    "Total Iterations": [training_summary.totalIterations]
}

summary_stats_psdf = ps.DataFrame(summary_stats)

# Display the DataFrames
print("\nCoefficients:")
print(coeff_psdf)
print("\nModel Summary Statistics:")
print(summary_stats_psdf)
# Uncache the DataFrame
train_data.unpersist()
Structure of train_data:
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+
|features                                                                                                                                                                             |price |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+
|[39.0342,-94.5429,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,3.0,6.456290016136541,0.0,0.0,0.0,1.0] |800.0 |
|[37.5423,-77.4347,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,2.0,2.0,2.259701505647789,0.0,0.0,0.0,1.0] |1000.0|
|[33.9222,-84.0725,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0143624850346007,0.0,0.0,1.0,0.0]|1017.0|
|[36.1599,-78.8975,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.879505347196897,0.0,0.0,1.0,0.0] |1023.0|
|[28.0395,-82.3952,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.5,2.0,3.3572708083910014,0.0,0.0,0.0,1.0]|1025.0|
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+
only showing top 5 rows


Number of features in train_data: 39
Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]
Uploading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]
Number of coefficients: 39
The number of features matches the number of coefficients.

Coefficients:
                   Feature  Coefficient
21             has_Doorman  1148.716768
39               Intercept  -714.878015
17            has_Elevator   499.418298
34      scaled_square_feet   332.941781
32               bathrooms   263.245148
14         has_Wood_Floors   237.347215
8           has_Playground  -202.845298
4                has_Alarm   201.098182
16                has_View   198.102579
25    has_Garbage_Disposal  -156.083536
38                  week_4  -152.516798
7            has_Clubhouse  -136.133296
33                bedrooms  -119.110003
5                 has_Golf   115.999245
36                  week_2   -98.548117
6                   has_TV    97.574141
19                 has_Gym    90.706536
23        has_Washer_Dryer   -88.462153
28           has_Fireplace   -73.894957
29            has_photo_no    65.849407
10  has_Cable_or_Satellite   -59.640026
24          has_Patio/Deck    58.142950
35                  week_1    56.487169
20             has_Storage   -44.021378
26              has_Luxury    42.007378
18             has_Hot_Tub    39.875829
9         has_Refrigerator   -38.136788
3              has_Parking    26.813819
37                  week_3    19.927478
15     has_Internet_Access    18.751996
27                  has_AC   -16.719452
2               has_Tennis   -15.076323
12               has_Gated    14.511717
0                 latitude    14.481363
30           has_photo_yes    10.668124
13                has_Pool    -8.475591
1                longitude    -8.217182
11             has_Unknown     4.414043
31        pets_allowed_Yes     1.477132
22          has_Dishwasher    -0.199086

Model Summary Statistics:
   R-squared  Adjusted R-squared        RMSE         MAE  Explained Variance  Total Iterations
0   0.249791            0.249497  781.308281  477.632184       203253.954134                 0
Out[ ]:
DataFrame[features: vector, price: double]
In [ ]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, udf, monotonically_increasing_id
from pyspark.sql.types import DoubleType
from pyspark.ml.feature import StandardScaler, VectorAssembler
from pyspark.ml.linalg import VectorUDT
import pyspark.pandas as ps

# Ensure Spark session is created
spark = SparkSession.builder \
    .appName("Feature Preparation for Analysis") \
    .config("spark.sql.shuffle.partitions", "200")  \
    .getOrCreate()

# Converting pandas-on-Spark DataFrame to a Spark DataFrame
vector_df = numeric_df.to_spark()

# Define the target column
target_col = "price"

# Add a unique ID column
vector_df = vector_df.withColumn("unique_id", monotonically_increasing_id())

# Manually create one-hot encoded columns for week_of_month
for i in range(1, 5):  # Assuming weeks are 1-4
    vector_df = vector_df.withColumn(f"week_{i}", when(col("week_of_month") == i, 1).otherwise(0))

# Standardize 'square feet'
assembler = VectorAssembler(inputCols=["square_feet"], outputCol="square_feet_vec")
scaler = StandardScaler(inputCol="square_feet_vec", outputCol="scaled_square_feet")
vector_df = assembler.transform(vector_df)
scaler_model = scaler.fit(vector_df)
vector_df = scaler_model.transform(vector_df)

# Create a UDF to extract the first (and only) element from the vector
@udf(returnType=DoubleType())
def extract_from_vector(v):
    return float(v[0])

# Extract the scaled square feet from the vector
vector_df = vector_df.withColumn("scaled_square_feet", extract_from_vector(col("scaled_square_feet")))

# Define the columns we want to keep
columns_to_keep = [
    'unique_id',
    'latitude', 'longitude', 
    'has_Tennis', 'has_Parking', 'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse',
    'has_Playground', 'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
    'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access', 'has_View',
    'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage', 'has_Doorman',
    'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck', 'has_Garbage_Disposal',
    'has_Luxury', 'has_AC', 'has_Fireplace', 'has_photo_no', 'has_photo_yes',
    'pets_allowed_Yes', 
    'bathrooms', 'bedrooms', 'scaled_square_feet',
    'week_1', 'week_2', 'week_3', 'week_4',
    target_col
]

# Create the final DataFrame with only the columns we need
final_df = vector_df.select(columns_to_keep)

# Convert to pandas-on-Spark DataFrame for easier viewing
final_psdf = final_df.pandas_api()

# Display info about the final DataFrame
print(final_psdf.info())

# Display the first few rows of the final DataFrame
print("\nFirst few rows of the final DataFrame:")
final_psdf.head()

# Save the final DataFrame for further analysis
# Uncomment the following line if you want to save it as a CSV file
# final_psdf.to_csv('prepared_rental_data.csv', index=False)
<class 'pyspark.pandas.frame.DataFrame'>
Int64Index: 99517 entries, 0 to 95562
Data columns (total 41 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   unique_id               99517 non-null  int64  
 1   latitude                99517 non-null  float64
 2   longitude               99517 non-null  float64
 3   has_Tennis              99517 non-null  int32  
 4   has_Parking             99517 non-null  int32  
 5   has_Alarm               99517 non-null  int32  
 6   has_Golf                99517 non-null  int32  
 7   has_TV                  99517 non-null  int32  
 8   has_Clubhouse           99517 non-null  int32  
 9   has_Playground          99517 non-null  int32  
 10  has_Refrigerator        99517 non-null  int32  
 11  has_Cable_or_Satellite  99517 non-null  int32  
 12  has_Unknown             99517 non-null  int32  
 13  has_Gated               99517 non-null  int32  
 14  has_Pool                99517 non-null  int32  
 15  has_Wood_Floors         99517 non-null  int32  
 16  has_Internet_Access     99517 non-null  int32  
 17  has_View                99517 non-null  int32  
 18  has_Elevator            99517 non-null  int32  
 19  has_Hot_Tub             99517 non-null  int32  
 20  has_Gym                 99517 non-null  int32  
 21  has_Storage             99517 non-null  int32  
 22  has_Doorman             99517 non-null  int32  
 23  has_Dishwasher          99517 non-null  int32  
 24  has_Washer_Dryer        99517 non-null  int32  
 25  has_Patio/Deck          99517 non-null  int32  
 26  has_Garbage_Disposal    99517 non-null  int32  
 27  has_Luxury              99517 non-null  int32  
 28  has_AC                  99517 non-null  int32  
 29  has_Fireplace           99517 non-null  int32  
 30  has_photo_no            99517 non-null  int32  
 31  has_photo_yes           99517 non-null  int32  
 32  pets_allowed_Yes        99517 non-null  int32  
 33  bathrooms               99517 non-null  float64
 34  bedrooms                99517 non-null  int32  
 35  scaled_square_feet      99517 non-null  float64
 36  week_1                  99517 non-null  int32  
 37  week_2                  99517 non-null  int32  
 38  week_3                  99517 non-null  int32  
 39  week_4                  99517 non-null  int32  
 40  price                   99517 non-null  float64
dtypes: float64(5), int32(35), int64(1)None

First few rows of the final DataFrame:
Out[ ]:
unique_id latitude longitude has_Tennis has_Parking has_Alarm has_Golf has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_Yes bathrooms bedrooms scaled_square_feet week_1 week_2 week_3 week_4 price
0 0 39.0342 -94.5429 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1.0 3 6.456290 0 0 0 1 800.0
1 1 37.5423 -77.4347 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 1 2.0 2 2.259702 0 0 0 1 1000.0
2 2 33.9222 -84.0725 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 1 1 0 0 1 1.0 1 2.014362 0 0 1 0 1017.0
3 3 36.1599 -78.8975 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 2.0 2 2.879505 0 0 1 0 1023.0
4 4 28.0395 -82.3952 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1.5 2 3.357271 0 0 0 1 1025.0
In [ ]:
summary_stats_psdf.display()
coeff_psdf.display()
R-squaredAdjusted R-squaredRMSEMAEExplained VarianceTotal Iterations
0.24979084097762050.24949672115895016781.3082812382936477.6321843139007203253.954134357630
FeatureCoefficient
has_Doorman1148.7167676165839
Intercept-714.878014571396
has_Elevator499.41829834028454
scaled_square_feet332.94178089917335
bathrooms263.2451481138218
has_Wood_Floors237.34721530559665
has_Playground-202.84529822548024
has_Alarm201.09818175498555
has_View198.1025786087673
has_Garbage_Disposal-156.08353562283395
week_4-152.51679771144896
has_Clubhouse-136.13329618871902
bedrooms-119.11000269091117
has_Golf115.99924525105642
week_2-98.54811710308732
has_TV97.57414134223637
has_Gym90.706535685193
has_Washer_Dryer-88.46215297666268
has_Fireplace-73.89495668117976
has_photo_no65.84940700298462
has_Cable_or_Satellite-59.640025604448525
has_Patio/Deck58.14295008391875
week_156.48716931487229
has_Storage-44.021378199388494
has_Luxury42.00737779701831
has_Hot_Tub39.87582885358747
has_Refrigerator-38.1367880892332
has_Parking26.81381941385856
week_319.927477921930628
has_Internet_Access18.75199634433781
has_AC-16.719452095245575
has_Tennis-15.076323314998662
has_Gated14.511716773885327
latitude14.481362792222722
has_photo_yes10.668123624570054
has_Pool-8.475591367472179
longitude-8.217181919323505
has_Unknown4.4140434411031215
pets_allowed_Yes1.4771320751831025
has_Dishwasher-0.19908565558793928

Scaling Data, Silhouette, Kmeans and Clustering¶

We had some optimization issues which we solved with some spark caching, threadpooling and parallelization.

In [ ]:
import pyspark.pandas as ps
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.clustering import KMeans
from pyspark.ml.evaluation import ClusteringEvaluator
from pyspark.sql import SparkSession
import time
from concurrent.futures import ThreadPoolExecutor

# Assuming you have a Spark session already created
spark = SparkSession.builder.getOrCreate()

# Assuming numeric_df is your pandas-on-Spark DataFrame
# Step 1: Create a unique_id column
# numeric_df = numeric_df.reset_index(drop=True)
# numeric_df['unique_id'] = numeric_df.index
#
# Step 2: Select the necessary columns
df = final_psdf[['unique_id', 'latitude', 'longitude']]

# Convert pandas-on-Spark DataFrame to Spark DataFrame for MLlib compatibility
df_spark = df.to_spark()

# Assemble the features
vector_assembler = VectorAssembler(inputCols=['latitude', 'longitude'], outputCol='features')
df_spark = vector_assembler.transform(df_spark)

# Cache the DataFrame to optimize performance
df_spark.cache()

# Step 3: Define a function to perform k-means clustering and compute silhouette score and WCSS
def compute_metrics(k, df_spark):
    evaluator = ClusteringEvaluator()
    start_time = time.time()
    kmeans = KMeans(k=k, seed=42)
    model = kmeans.fit(df_spark)
    transformed = model.transform(df_spark)
    silhouette = evaluator.evaluate(transformed)
    wcss = model.summary.trainingCost
    end_time = time.time()
    elapsed_time = end_time - start_time
    return k, silhouette, wcss, elapsed_time

# Create a list of k values
k_values = list(range(2, 81))

# Step 4: Use ThreadPoolExecutor to parallelize metric computation
metrics = []

def parallel_compute_metrics(k):
    return compute_metrics(k, df_spark)

with ThreadPoolExecutor(max_workers=8) as executor:  # Adjust max_workers based on your cluster
    results = list(executor.map(parallel_compute_metrics, k_values))

metrics = results

# Print metrics and computation times
for k, silhouette, wcss, time_taken in metrics:
    print(f"k: {k}, Silhouette Score: {silhouette}, WCSS: {wcss}, Time Taken: {time_taken} seconds")

# Step 5: Determine the optimal k based on silhouette scores
optimal_k = max(metrics, key=lambda x: x[1])[0]
print(f"Optimal k based on Silhouette Score: {optimal_k}")

# Perform k-means clustering with the optimal k
kmeans = KMeans(k=optimal_k, seed=42)
model = kmeans.fit(df_spark)
df_spark = model.transform(df_spark)

# Convert back to pandas-on-Spark DataFrame if needed
result_df = ps.DataFrame(df_spark.select('unique_id', 'latitude', 'longitude', 'prediction'))

# Show the resultsss
result_df.head()

# Clean up
df_spark.unpersist()
Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]
Uploading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]
Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]
Uploading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]
k: 2, Silhouette Score: 0.763449034248761, WCSS: 7819512.118361829, Time Taken: 12.302191257476807 seconds
k: 3, Silhouette Score: 0.7066676965898263, WCSS: 4130639.958093457, Time Taken: 56.06333518028259 seconds
k: 4, Silhouette Score: 0.626529195061003, WCSS: 3066695.262633371, Time Taken: 12.9237060546875 seconds
k: 5, Silhouette Score: 0.6570225770709357, WCSS: 2184460.5820053252, Time Taken: 12.365989923477173 seconds
k: 6, Silhouette Score: 0.674843772590051, WCSS: 1734666.9166672565, Time Taken: 12.941851377487183 seconds
k: 7, Silhouette Score: 0.6453930030584214, WCSS: 1369393.9327533566, Time Taken: 13.146583557128906 seconds
k: 8, Silhouette Score: 0.6668562607779775, WCSS: 1375699.5277522383, Time Taken: 12.790895223617554 seconds
k: 9, Silhouette Score: 0.7479653824112578, WCSS: 796316.6247356371, Time Taken: 14.169133424758911 seconds
k: 10, Silhouette Score: 0.718012839465669, WCSS: 797762.747828376, Time Taken: 6.459659814834595 seconds
k: 11, Silhouette Score: 0.7081238587805871, WCSS: 757223.6080651081, Time Taken: 6.1843955516815186 seconds
k: 12, Silhouette Score: 0.7297178584358022, WCSS: 526125.8994965373, Time Taken: 5.58376669883728 seconds
k: 13, Silhouette Score: 0.7730932477821403, WCSS: 459962.5308252313, Time Taken: 7.0191755294799805 seconds
k: 14, Silhouette Score: 0.6973491100003041, WCSS: 510970.3514817107, Time Taken: 6.999093055725098 seconds
k: 15, Silhouette Score: 0.7488379733628568, WCSS: 369717.7783567335, Time Taken: 6.379661798477173 seconds
k: 16, Silhouette Score: 0.7576676038945148, WCSS: 315268.5092324907, Time Taken: 5.886139869689941 seconds
k: 17, Silhouette Score: 0.7752504884542833, WCSS: 291043.90755458834, Time Taken: 6.939961910247803 seconds
k: 18, Silhouette Score: 0.7818385047999875, WCSS: 255229.731579469, Time Taken: 8.048190116882324 seconds
k: 19, Silhouette Score: 0.7680298830490281, WCSS: 357955.1545308105, Time Taken: 5.2976391315460205 seconds
k: 20, Silhouette Score: 0.7926414459045124, WCSS: 208535.88127667134, Time Taken: 9.10690450668335 seconds
k: 21, Silhouette Score: 0.7789597054791254, WCSS: 205638.80449035042, Time Taken: 9.247199058532715 seconds
k: 22, Silhouette Score: 0.7977354965000903, WCSS: 183427.20801378967, Time Taken: 8.668189525604248 seconds
k: 23, Silhouette Score: 0.8046357428406448, WCSS: 183530.45723809785, Time Taken: 6.809260845184326 seconds
k: 24, Silhouette Score: 0.7439339669693564, WCSS: 197258.19261235185, Time Taken: 7.405336618423462 seconds
k: 25, Silhouette Score: 0.7767917401232765, WCSS: 169568.61301595188, Time Taken: 6.740826606750488 seconds
k: 26, Silhouette Score: 0.7932025975963954, WCSS: 148493.87824072098, Time Taken: 8.219924449920654 seconds
k: 27, Silhouette Score: 0.7960708453693889, WCSS: 164643.6345777706, Time Taken: 7.513131618499756 seconds
k: 28, Silhouette Score: 0.8340780235239595, WCSS: 118442.9851823928, Time Taken: 7.094217300415039 seconds
k: 29, Silhouette Score: 0.8330482192122055, WCSS: 116934.47893890175, Time Taken: 7.004460096359253 seconds
k: 30, Silhouette Score: 0.8171113636663343, WCSS: 110803.65912103826, Time Taken: 8.085708379745483 seconds
k: 31, Silhouette Score: 0.816601895192477, WCSS: 108347.51366863374, Time Taken: 6.879997253417969 seconds
k: 32, Silhouette Score: 0.8018064277801296, WCSS: 102294.49071926378, Time Taken: 6.232992887496948 seconds
k: 33, Silhouette Score: 0.804207924790133, WCSS: 96471.49709637574, Time Taken: 7.1942408084869385 seconds
k: 34, Silhouette Score: 0.8279710541479418, WCSS: 87438.47453428664, Time Taken: 8.69472885131836 seconds
k: 35, Silhouette Score: 0.8159013280217633, WCSS: 92001.43781252524, Time Taken: 6.730831623077393 seconds
k: 36, Silhouette Score: 0.797526439419168, WCSS: 78505.13722900697, Time Taken: 8.021164178848267 seconds
k: 37, Silhouette Score: 0.8087660877390394, WCSS: 78845.45651923312, Time Taken: 7.640315055847168 seconds
k: 38, Silhouette Score: 0.7971209280863393, WCSS: 74982.47259881966, Time Taken: 7.682674884796143 seconds
k: 39, Silhouette Score: 0.7864781414727542, WCSS: 77008.91499213867, Time Taken: 8.548113584518433 seconds
k: 40, Silhouette Score: 0.792104573140613, WCSS: 73386.27450628248, Time Taken: 7.544145345687866 seconds
k: 41, Silhouette Score: 0.7814244271658701, WCSS: 73820.90253733854, Time Taken: 7.601768970489502 seconds
k: 42, Silhouette Score: 0.7754729563063464, WCSS: 73284.59884842591, Time Taken: 7.47021484375 seconds
k: 43, Silhouette Score: 0.7913512716659951, WCSS: 65430.468663536, Time Taken: 12.877533435821533 seconds
k: 44, Silhouette Score: 0.7548145174390559, WCSS: 70038.20725082759, Time Taken: 17.766409397125244 seconds
k: 45, Silhouette Score: 0.8200907979124444, WCSS: 57558.94401610538, Time Taken: 15.773146629333496 seconds
k: 46, Silhouette Score: 0.8068605977546413, WCSS: 52397.45705784256, Time Taken: 16.517797708511353 seconds
k: 47, Silhouette Score: 0.7900390667040961, WCSS: 54484.19707486911, Time Taken: 17.90798020362854 seconds
k: 48, Silhouette Score: 0.8003772678172559, WCSS: 54661.586510008055, Time Taken: 17.240264177322388 seconds
k: 49, Silhouette Score: 0.7948315777622762, WCSS: 50583.71699534971, Time Taken: 55.32450461387634 seconds
k: 50, Silhouette Score: 0.7983751067184075, WCSS: 50776.23331974545, Time Taken: 12.305404663085938 seconds
k: 51, Silhouette Score: 0.791669989266832, WCSS: 51170.07619852613, Time Taken: 12.535984992980957 seconds
k: 52, Silhouette Score: 0.7976659288060297, WCSS: 45421.276155812055, Time Taken: 8.632468938827515 seconds
k: 53, Silhouette Score: 0.7989224828194511, WCSS: 44566.488136494816, Time Taken: 8.577142477035522 seconds
k: 54, Silhouette Score: 0.8293581326510951, WCSS: 42119.00802898271, Time Taken: 8.389490842819214 seconds
k: 55, Silhouette Score: 0.789892762163796, WCSS: 41991.56417328655, Time Taken: 7.896442174911499 seconds
k: 56, Silhouette Score: 0.8232711515625952, WCSS: 40491.63861324899, Time Taken: 7.8990044593811035 seconds
k: 57, Silhouette Score: 0.8008040468122573, WCSS: 40499.83601823361, Time Taken: 8.842151880264282 seconds
k: 58, Silhouette Score: 0.8118856034733605, WCSS: 39648.87586369499, Time Taken: 11.153058052062988 seconds
k: 59, Silhouette Score: 0.8237654276641893, WCSS: 36595.60467797983, Time Taken: 7.658572673797607 seconds
k: 60, Silhouette Score: 0.7695358885552273, WCSS: 37260.22008281755, Time Taken: 8.92990756034851 seconds
k: 61, Silhouette Score: 0.8236054588532493, WCSS: 33595.74126109876, Time Taken: 6.885226488113403 seconds
k: 62, Silhouette Score: 0.8287085422450893, WCSS: 33674.46643113743, Time Taken: 9.012094497680664 seconds
k: 63, Silhouette Score: 0.76297484583599, WCSS: 35843.76721882181, Time Taken: 7.951349496841431 seconds
k: 64, Silhouette Score: 0.8021408707097821, WCSS: 30941.980445092824, Time Taken: 9.699987888336182 seconds
k: 65, Silhouette Score: 0.7853608273727848, WCSS: 31649.967949477163, Time Taken: 8.849440574645996 seconds
k: 66, Silhouette Score: 0.7834194644326498, WCSS: 31115.104687594416, Time Taken: 8.718227624893188 seconds
k: 67, Silhouette Score: 0.7879665478399728, WCSS: 31607.411451211618, Time Taken: 9.10171127319336 seconds
k: 68, Silhouette Score: 0.7778732609058202, WCSS: 28836.54567989254, Time Taken: 11.061966896057129 seconds
k: 69, Silhouette Score: 0.8419497380913247, WCSS: 27511.94290020385, Time Taken: 11.135072469711304 seconds
k: 70, Silhouette Score: 0.8095064999875181, WCSS: 29686.801885118515, Time Taken: 10.736343145370483 seconds
k: 71, Silhouette Score: 0.7988862098544455, WCSS: 28315.54293937125, Time Taken: 10.754753351211548 seconds
k: 72, Silhouette Score: 0.811370345459618, WCSS: 27334.985435366365, Time Taken: 9.813760042190552 seconds
k: 73, Silhouette Score: 0.7812016940571781, WCSS: 25783.89865789666, Time Taken: 9.789621591567993 seconds
k: 74, Silhouette Score: 0.7706518260519104, WCSS: 28332.927369483383, Time Taken: 9.29653263092041 seconds
k: 75, Silhouette Score: 0.8020910123059354, WCSS: 27701.59025749798, Time Taken: 9.8978910446167 seconds
k: 76, Silhouette Score: 0.8021665858204504, WCSS: 27578.033873043816, Time Taken: 9.199559211730957 seconds
k: 77, Silhouette Score: 0.79936549542132, WCSS: 23995.97006583454, Time Taken: 11.353132247924805 seconds
k: 78, Silhouette Score: 0.7871390924737632, WCSS: 24333.514848260787, Time Taken: 12.008180141448975 seconds
k: 79, Silhouette Score: 0.7912961047457151, WCSS: 24040.755965282333, Time Taken: 8.763015031814575 seconds
k: 80, Silhouette Score: 0.8051453111991831, WCSS: 24745.50750979829, Time Taken: 12.011964082717896 seconds
Optimal k based on Silhouette Score: 69
Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]
Uploading artifacts:   0%|          | 0/4 [00:00<?, ?it/s]
Out[ ]:
DataFrame[unique_id: bigint, latitude: double, longitude: double, features: vector, prediction: int]
In [ ]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import silhouette_samples

# Extract data for plotting
k_values, silhouette_values, wcss_values, _ = zip(*metrics)

# Plot 1: Silhouette Scores
plt.figure(figsize=(12, 6))
plt.plot(k_values, silhouette_values, 'bo-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score vs. Number of Clusters')
plt.grid(True)
plt.show()

# Plot 2: Elbow Plot (WCSS)
plt.figure(figsize=(12, 6))
plt.plot(k_values, wcss_values, 'ro-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Within-Cluster Sum of Squares (WCSS)')
plt.title('Elbow Method for Optimal k')
plt.grid(True)
plt.show()

# Plot 3: Clustered Data Points
plt.figure(figsize=(12, 10))  # Increased figure height to accommodate legend below

# Convert PySpark DataFrame to numpy arrays
predictions = result_df['prediction'].to_numpy()
longitudes = result_df['longitude'].to_numpy()
latitudes = result_df['latitude'].to_numpy()

# Get unique cluster labels and sort them
unique_labels = sorted(np.unique(predictions))

# Create a color map
colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))

# Plot each cluster
for label, color in zip(unique_labels, colors):
    mask = predictions == label
    plt.scatter(longitudes[mask], latitudes[mask], 
                c=[color], label=f'Cluster {label}', alpha=0.6)

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title(f'K-Means Clustering Results (k={optimal_k})')

# Sort the legend labels
handles, labels = plt.gca().get_legend_handles_labels()
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: int(t[0].split()[-1])))

# Place legend below the chart
plt.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, -0.05),
           fancybox=True, shadow=True, ncol=5)  # Adjust ncol as needed

plt.grid(True)
plt.tight_layout()  # Adjust the plot to ensure all elements are visible
plt.show()

# Plot 4: Silhouette Plot
plt.figure(figsize=(12, 8))

# Compute silhouette scores for each sample
silhouette_samples_values = silhouette_samples(np.column_stack((longitudes, latitudes)), predictions)

y_lower = 10
for i in range(optimal_k):
    # Aggregate the silhouette scores for samples belonging to cluster i
    ith_cluster_silhouette_values = silhouette_samples_values[predictions == i]
    ith_cluster_silhouette_values.sort()
    
    size_cluster_i = ith_cluster_silhouette_values.shape[0]
    y_upper = y_lower + size_cluster_i
    
    color = colors[i]
    plt.fill_betweenx(np.arange(y_lower, y_upper),
                      0, ith_cluster_silhouette_values,
                      facecolor=color, edgecolor=color, alpha=0.7)
    
    # Label the silhouette plots with their cluster numbers at the middle
    plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
    
    # Compute the new y_lower for next plot
    y_lower = y_upper + 10  # 10 for the 0 samples

plt.title("The silhouette plot for the various clusters.")
plt.xlabel("The silhouette coefficient values")
plt.ylabel("Cluster label")

# The vertical line for average silhouette score of all the values
plt.axvline(x=np.mean(silhouette_samples_values), color="red", linestyle="--")

plt.yticks([])  # Clear the yaxis labels / ticks
plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

plt.tight_layout()
plt.show()

# Optional: Save the plots
# plt.savefig('silhouette_scores.png')
# plt.savefig('elbow_plot.png')
# plt.savefig('clustering_results.png', bbox_inches='tight')  # Ensure legend is included when saving
# plt.savefig('silhouette_plot.png', bbox_inches='tight')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
import pyspark.pandas as ps

# Convert result_df to a pandas-on-Spark DataFrame
result_psdf = result_df

# Ensure result_psdf has only the necessary columns
result_psdf = result_psdf[['unique_id', 'prediction']]

# Merge the cluster assignments back to the original dataframe
# nonscaled_clustered_df = numeric_df.merge(
#     result_psdf,
#     on='unique_id',
#     how='left'
# )

# Merge the cluster assignments back to the original dataframe
clustered_df = final_psdf.merge(
    result_psdf,
    on='unique_id',
    how='left'
)

# Show the first few rows of the result
display(clustered_df.head())

# Get some basic statistics about the clusters
cluster_stats = clustered_df.groupby('prediction').agg({
    'unique_id': 'count',
    'latitude': ['mean', 'min', 'max'],
    'longitude': ['mean', 'min', 'max']
}).reset_index()

print("\nCluster Statistics:")
print(cluster_stats)

# Optional: Save the clustered dataframe
# clustered_df.to_csv('clustered_data.csv', index=False)
unique_idlatitudelongitudehas_Tennishas_Parkinghas_Alarmhas_Golfhas_TVhas_Clubhousehas_Playgroundhas_Refrigeratorhas_Cable_or_Satellitehas_Unknownhas_Gatedhas_Poolhas_Wood_Floorshas_Internet_Accesshas_Viewhas_Elevatorhas_Hot_Tubhas_Gymhas_Storagehas_Doormanhas_Dishwasherhas_Washer_Dryerhas_Patio/Deckhas_Garbage_Disposalhas_Luxuryhas_AChas_Fireplacehas_photo_nohas_photo_yespets_allowed_Yesbathroomsbedroomsscaled_square_feetweek_1week_2week_3week_4priceprediction
039.05-84.34390000000001000000000000000000111.011.996284872989418500101000.03
135.9165-78.91770000100000000000010000000010111.012.17447847743478701001008.058
241.4779-87.30580000000001000000000000000000111.011.854246492634414401001019.043
332.91-97.55720100000000010000010000000000011.011.872324104679596900101025.013
432.7767-97.08161100010000000000010001000000110.011.965294680911963100101040.013
Cluster Statistics:
   prediction unique_id   latitude                     longitude                    
                  count       mean      min      max        mean       min       max
0          31       964  35.458441  33.8994  36.4297  -97.438064  -99.3992  -95.7854
1          65       928  37.676690  36.5877  38.5392 -122.150870 -122.9650 -120.8502
2          53       123  36.288060  34.6530  37.3395 -119.723941 -120.9770 -118.9327
3          34      3739  40.752542  40.0711  41.9398  -74.146435  -75.5692  -73.0548
4          28       410  47.151638  44.3600  48.8467  -96.939593  -99.1233  -94.8577
5          27      1120  41.298523  40.6246  43.7274  -96.257838  -99.0865  -95.2202
6          26       679  26.400779  24.5645  27.0783  -81.821737  -82.3809  -81.5339
7          44      2562  47.587022  46.2858  48.7871 -122.186509 -123.0586 -119.2833
8          12       749  38.689381  37.6656  40.9288 -121.466670 -124.2265 -120.9456
9          22       602  35.415762  34.1847  37.0926  -82.970606  -84.3648  -81.8052
10         47       516  38.696588  37.3130  40.4592  -90.551110  -92.3696  -88.3730
11          1      4397  34.052055  33.7435  35.6241 -118.336423 -119.7428 -117.6882
12         52        75  47.082651  45.6661  47.9717 -115.906452 -118.9707 -114.0109
13         13      7679  32.857835  31.7894  33.8169  -96.794428  -98.1898  -94.6849
14         16      5782  42.398213  41.4764  44.8163  -71.161118  -72.0320  -68.7788
15          6       311  45.190544  42.1583  45.7941 -122.695243 -123.3734 -121.2030
16          3      2194  39.164440  37.6675  40.8297  -84.403614  -85.4000  -82.7679
17         20       291  27.682800  26.1596  28.8584  -97.444457  -99.5123  -97.0014
18         40        58  45.907878  42.5425  47.4759 -109.852955 -112.0096 -108.1832
19         57       201  39.033606  38.4093  39.2115  -96.035594  -96.8382  -95.6403
20         54      1215  33.931516  33.4810  34.8870 -117.158565 -117.6097 -115.7200
21         48       652  32.231140  31.3517  35.1565 -110.837331 -111.2942 -106.3886
22          5      6320  39.666363  35.1038  42.1244 -104.975425 -108.5479 -102.8964
23         19      1222  30.371511  29.5746  32.4200  -91.021634  -93.2763  -88.5209
24         64       350  32.811575  31.8744  34.1751  -80.279864  -82.1391  -79.4699
25         41       351  41.848046  41.0055  43.0597  -92.802444  -94.1803  -90.3928
26         15      1096  42.248487  40.8623  46.4805  -83.794953  -85.9328  -82.4822
27         43      1540  42.039469  40.0656  46.5500  -87.997466  -89.7165  -85.8527
28         37       808  40.710100  39.1083  43.6865 -111.842660 -114.0215 -108.6071
29         61       143  44.270130  42.9236  44.9449  -72.988917  -74.9555  -71.4988
30         17       734  40.472663  38.9239  42.1100  -80.015422  -80.7039  -78.4057
31          9        74  33.602236  31.8666  37.0439 -101.624292 -103.2484  -99.7024
32         35        83  43.167627  40.6194  43.6620 -116.283498 -116.9378 -113.2907
33          4      1326  28.494135  27.6361  29.2867  -81.498701  -82.5222  -80.4261
34         59       391  30.261091  29.4478  31.8486  -81.680929  -83.3267  -81.1211
35         55       267  36.522928  35.1878  37.4133  -93.850739  -94.7725  -92.5940
36          8       575  44.966720  43.7973  47.1165  -93.191948  -94.3728  -88.5631
37         23      2275  37.623038  36.4531  38.6310  -77.582103  -79.1370  -77.2433
38         39       676  30.341089  29.8796  31.7509  -97.744829  -98.9677  -96.9336
39         49       357  47.632492  44.0763  48.4169 -101.834662 -103.8239 -100.1875
40          7       731  35.223555  33.1550  37.1899  -86.597405  -87.6872  -85.0250
41         51       158  37.752265  37.5649  39.3614  -97.464472 -100.8498  -96.8648
42         63       712  39.102153  38.7603  39.7874  -94.563581  -94.9493  -93.5572
43         10       734  41.878467  40.7750  42.9367  -72.672399  -74.0287  -71.9117
44         50      2451  27.858158  27.0525  29.0234  -82.531026  -82.8268  -81.8186
45         45       441  38.935525  37.6408  39.3069  -94.787060  -95.4650  -94.6260
46         38       440  39.497655  38.8295  39.6582 -119.801676 -120.0351 -119.2317
47         25       887  26.222205  25.3801  27.5916  -80.235295  -80.5047  -80.0566
48         24      1711  33.567803  32.8931  35.2270 -111.998285 -112.5707 -111.3066
49         62      1647  40.007466  38.7258  41.4840  -74.940790  -75.9959  -74.1990
50         29      4832  33.853643  31.7036  35.1986  -84.353385  -86.1009  -83.1686
51         21      1560  36.399064  35.6176  37.9719  -79.866392  -81.5113  -78.9306
52         32      1518  41.346071  39.9628  41.9866  -81.591791  -82.8181  -80.3126
53         60       114  43.026267  41.1351  44.3024  -77.187233  -78.9607  -75.2143
54         56       163  30.875948  30.1607  32.8418  -86.018401  -88.2373  -84.0890
55         58      3226  35.788215  34.9674  36.7099  -78.683957  -79.3885  -77.3486
56         33      1958  35.154421  33.9006  35.9083  -80.898734  -81.9645  -79.7341
57         11      2408  36.165366  34.4752  38.4533 -115.172531 -116.2805 -112.8266
58         68       922  29.528844  28.9673  30.1051  -98.548845  -99.1548  -97.9657
59         14       696  33.831681  31.7872  35.4961  -92.950421  -94.6692  -90.8945
60         42      1274  40.012806  38.2313  41.1211  -82.960595  -84.0782  -81.4792
61          2      1560  29.894512  28.9520  31.7067  -95.455782  -96.3777  -93.7581
62         30       442  35.265488  33.4345  37.0376  -89.893783  -91.2012  -88.2511
63         66       249  34.271618  33.6285  35.0960  -77.921776  -79.4567  -76.7601
64         46      1612  33.769279  33.4383  34.1383 -117.928792 -118.1973 -117.5596
65         67       848  38.840626  37.2068  40.7901  -85.968491  -88.1017  -85.3842
66          0      2459  36.958129  36.0217  37.8946  -76.325611  -76.7583  -75.5594
67         18      8597  39.006453  38.1175  40.9381  -77.003070  -78.5107  -76.1512
68         36      1332  32.874022  32.5601  33.3694 -117.084980 -117.3526 -114.5020
In [ ]:
clustered_df.columns
Out[ ]:
Index(['unique_id', 'latitude', 'longitude', 'has_Tennis', 'has_Parking',
       'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
       'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
       'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
       'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
       'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
       'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
       'has_photo_no', 'has_photo_yes', 'pets_allowed_Yes', 'bathrooms',
       'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3',
       'week_4', 'price', 'prediction'],
      dtype='object')

VIF scores and Linear Regressions by Cluster¶

We created several functions that calculate the VIF score for each cluster. This isn't trivial because it requires checking for constant columns, removing highly correlated columns with VIF scores above 5 and looping this process until all the columns have a VIF score under 5.

We also calculated a Linear Regression for each cluster according to the VIF feature selection process. We save it all into a dataframe for viewing.

In [ ]:
# renaming the prediction column for readability 
clustered_df = clustered_df.rename(columns={'prediction': 'clusters'})
clustered_df.columns
Out[ ]:
Index(['unique_id', 'latitude', 'longitude', 'has_Tennis', 'has_Parking',
       'has_Alarm', 'has_Golf', 'has_TV', 'has_Clubhouse', 'has_Playground',
       'has_Refrigerator', 'has_Cable_or_Satellite', 'has_Unknown',
       'has_Gated', 'has_Pool', 'has_Wood_Floors', 'has_Internet_Access',
       'has_View', 'has_Elevator', 'has_Hot_Tub', 'has_Gym', 'has_Storage',
       'has_Doorman', 'has_Dishwasher', 'has_Washer_Dryer', 'has_Patio/Deck',
       'has_Garbage_Disposal', 'has_Luxury', 'has_AC', 'has_Fireplace',
       'has_photo_no', 'has_photo_yes', 'pets_allowed_Yes', 'bathrooms',
       'bedrooms', 'scaled_square_feet', 'week_1', 'week_2', 'week_3',
       'week_4', 'price', 'clusters'],
      dtype='object')

We had an issue where the pandas dataframe to output the results would not work when using pyspark dataframes. There was a conflict with worker nodes and driver.

In [ ]:
import pyspark.pandas as ps
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize Spark session
spark = SparkSession.builder \
    .appName("VIF Calculation and Price Prediction") \
    .getOrCreate()

# Assuming clustered_df is a pandas-on-Spark DataFrame
cluster_col = 'clusters'
unique_id_col = 'unique_id'
price_col = 'price'
numeric_columns = [col for col in clustered_df.columns if col not in [unique_id_col, cluster_col, price_col]]

# Convert to Spark DataFrame for efficient processing
spark_df = clustered_df.to_spark()

def remove_constant_columns(df):
    return df.loc[:, df.nunique() != 1]

def calculate_vif(pdf, feature_cols):
    vif_data = []
    for col_name in feature_cols:
        y = pdf[col_name]
        X = pdf[[x for x in feature_cols if x != col_name]]
        lr = LinearRegression().fit(X, y)
        r2 = lr.score(X, y)
        vif = 1 / (1 - r2)
        vif_data.append((col_name, vif))
    return pd.DataFrame(vif_data, columns=['feature', 'VIF'])

def iterative_vif(pdf, feature_cols, threshold=5):
    while True:
        vif_df = calculate_vif(pdf, feature_cols)
        max_vif = vif_df['VIF'].max()
        if max_vif < threshold:
            break
        feature_to_remove = vif_df.loc[vif_df['VIF'].idxmax(), 'feature']
        feature_cols.remove(feature_to_remove)
        if len(feature_cols) < 2:
            break
    return vif_df, feature_cols

def predict_price(pdf, feature_cols):
    X = pdf[feature_cols]
    y = pdf[price_col]
    model = LinearRegression().fit(X, y)
    predictions = model.predict(X)
    mse = mean_squared_error(y, predictions)
    r2 = r2_score(y, predictions)

    n = len(y)
    p = len(feature_cols)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

    return pd.DataFrame({'feature': feature_cols, 'coefficient': model.coef_}), model.intercept_, mse, r2, adjusted_r2

result_schema = StructType([
    StructField("feature", StringType(), True),
    StructField("VIF", DoubleType(), True),
    StructField("coefficient", DoubleType(), True),
    StructField("cluster", IntegerType(), True),
    StructField("intercept", DoubleType(), True),
    StructField("mse", DoubleType(), True),
    StructField("r2", DoubleType(), True),
    StructField("adjusted_r2", DoubleType(), True),
    StructField("row_count", IntegerType(), True)


])

def process_group(pdf):
    cluster_value = pdf[cluster_col].iloc[0]
    row_count = len(pdf)
    non_constant_df = remove_constant_columns(pdf[numeric_columns])
    feature_cols = non_constant_df.columns.tolist()
    
    if len(feature_cols) < 2:
        return pd.DataFrame(columns=['feature', 'VIF', 'coefficient', 'cluster', 'intercept', 'mse', 'r2', 'adjusted_r2', 'row_count'])
    
    vif_df, final_features = iterative_vif(non_constant_df, feature_cols)
    
    coef_df, intercept, mse, r2, adjusted_r2 = predict_price(pdf, final_features)
    
    result_df = vif_df[vif_df['feature'].isin(final_features)].merge(coef_df, on='feature', how='left')
    result_df['cluster'] = cluster_value
    result_df['intercept'] = intercept
    result_df['mse'] = mse
    result_df['r2'] = r2
    result_df['adjusted_r2'] = adjusted_r2
    result_df['row_count'] = row_count

    return result_df

# Apply the function to each group using groupBy and applyInPandas
results = spark_df.groupBy(cluster_col).applyInPandas(process_group, schema=result_schema)

# Convert to pandas for easier manipulation and display
results_pd = results.toPandas()

# Display results
print("\nResults for each cluster:")
for cluster in results_pd['cluster'].unique():
    cluster_results = results_pd[results_pd['cluster'] == cluster]
    print(f"\nCluster {cluster}:")
    print("Features, VIF, and Coefficients:")
    print(cluster_results[['feature', 'VIF', 'coefficient']])
    print(f"Intercept: {cluster_results['intercept'].iloc[0]}")
    print(f"Mean Squared Error: {cluster_results['mse'].iloc[0]}")
    print(f"R-squared: {cluster_results['r2'].iloc[0]}")
    print(f"Adjusted R-squared: {cluster_results['adjusted_r2'].iloc[0]}")
    print(f"Row count: {cluster_results['row_count'].iloc[0]}")

# Optional: Save to CSV
# results_pd.to_csv('vif_and_regression_results_by_cluster.csv', index=False)
Results for each cluster:

Cluster 1:
Features, VIF, and Coefficients:
                   feature       VIF  coefficient
0                 latitude  1.136220  -247.743354
1                longitude  1.136633 -1149.969492
2               has_Tennis  1.164859    26.878715
3              has_Parking  1.372739   -17.928283
4                has_Alarm  1.025505  -439.268167
5                   has_TV  1.143097   280.431279
6            has_Clubhouse  1.202413   -56.860778
7           has_Playground  1.146448  -128.586345
8         has_Refrigerator  1.665827    49.902413
9   has_Cable_or_Satellite  1.804974  -111.926752
10             has_Unknown  1.609796    26.719641
11               has_Gated  1.513861  -264.302005
12                has_Pool  1.555737   248.563655
13         has_Wood_Floors  1.169279  -142.578390
14     has_Internet_Access  1.568422   242.817799
15                has_View  1.069060   222.489605
16            has_Elevator  1.564472   351.235785
17             has_Hot_Tub  1.269900   -69.632048
18                 has_Gym  1.543306    21.028659
19             has_Storage  1.106800   -34.589728
20             has_Doorman  1.026988  1778.738531
21          has_Dishwasher  1.941071  -168.951483
22        has_Washer_Dryer  1.152290    17.489561
23          has_Patio/Deck  1.288443    -7.775956
24    has_Garbage_Disposal  1.130511  -296.488063
25              has_Luxury  1.009518   636.488718
26                  has_AC  1.657418  -168.877662
27           has_Fireplace  1.235341   -17.110677
28            has_photo_no  1.445649    63.012089
29           has_photo_yes  1.725025   -45.781483
30        pets_allowed_Yes  1.055916   298.006921
31               bathrooms  2.901135   776.553136
32                bedrooms  2.323574  -271.611015
33      scaled_square_feet  2.493659   986.087635
34                  week_1  1.062504  -131.932388
35                  week_2  1.295825  -708.919459
36                  week_4  1.345707  -371.165253
Intercept: -127855.00168877741
Mean Squared Error: 1849043.8334700346
R-squared: 0.6105100713030106
Adjusted R-squared: 0.6072040085909691
Row count: 4397

Cluster 12:
Features, VIF, and Coefficients:
                   feature       VIF  coefficient
37                latitude  1.955843  -290.052256
38               longitude  2.092010  -139.525689
39              has_Tennis  1.205851    43.476088
40             has_Parking  1.616851    31.052135
41               has_Alarm  1.640565    27.613751
42                  has_TV  1.581251   -32.496693
43           has_Clubhouse  1.857468    73.153362
44          has_Playground  1.506839    44.577856
45        has_Refrigerator  2.394159   -33.323752
46  has_Cable_or_Satellite  2.636075   -73.675993
47             has_Unknown  1.817353   120.887298
48               has_Gated  1.439450   -25.730953
49                has_Pool  1.572800    50.645735
50         has_Wood_Floors  1.151079   117.614863
51     has_Internet_Access  1.911567   -25.432871
52                has_View  1.120854   -14.514612
53            has_Elevator  1.160440   711.504754
54             has_Hot_Tub  1.822884   109.259779
55                 has_Gym  1.970108    51.145623
56             has_Storage  1.842916    -5.886668
57          has_Dishwasher  2.599478    24.212583
58        has_Washer_Dryer  1.996224   107.759973
59          has_Patio/Deck  2.148608   -78.014403
60    has_Garbage_Disposal  1.827727   -36.551376
61              has_Luxury  1.114419  -208.309708
62                  has_AC  1.974779   -89.554805
63           has_Fireplace  1.870510   -11.550000
64            has_photo_no  1.426984    38.180915
65           has_photo_yes  1.315826    11.031244
66        pets_allowed_Yes  1.162067    58.473299
67               bathrooms  2.541705   212.031868
68                bedrooms  2.675638   -79.790711
69      scaled_square_feet  3.394268   247.786612
70                  week_1  1.107752   137.858566
71                  week_2  1.097989  -320.335449
72                  week_3  1.259794   100.147942
Intercept: -5068.981980932094
Mean Squared Error: 62606.5545276049
R-squared: 0.6074576652344965
Adjusted R-squared: 0.5876100190946676
Row count: 749

Cluster 22:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
73                 latitude  2.746369   -64.685637
74                longitude  3.010876   -14.950513
75               has_Tennis  1.728615   -15.723408
76              has_Parking  1.694958    85.669375
77                   has_TV  1.117862   -71.401956
78            has_Clubhouse  2.319333     9.360804
79           has_Playground  2.237522   -77.718158
80         has_Refrigerator  3.671827    -3.846969
81   has_Cable_or_Satellite  3.195047    21.139152
82              has_Unknown  1.832445    43.981893
83                has_Gated  1.647874    61.841669
84                 has_Pool  2.291595    79.196003
85          has_Wood_Floors  1.496837    19.812707
86      has_Internet_Access  2.370342     2.358234
87                 has_View  1.428881   -15.219875
88             has_Elevator  1.452879   272.564138
89              has_Hot_Tub  1.675968   186.118612
90                  has_Gym  2.007903     1.110050
91              has_Storage  1.539032     7.999268
92           has_Dishwasher  3.282462   -61.868464
93         has_Washer_Dryer  2.002200   -13.891629
94           has_Patio/Deck  1.601004   -48.768162
95     has_Garbage_Disposal  2.184648  -123.367148
96                   has_AC  2.305410   -12.359943
97            has_Fireplace  1.684242    39.075864
98             has_photo_no  1.286692   120.650314
99            has_photo_yes  2.975544   129.981921
100        pets_allowed_Yes  1.216509   115.876091
101               bathrooms  2.807367    42.441056
102                bedrooms  3.252056    24.070947
103      scaled_square_feet  3.131939   174.920828
104                  week_1  1.166099   173.587776
105                  week_3  1.306310     9.654138
106                  week_4  2.934345   220.204099
Intercept: 1193.6600269323744
Mean Squared Error: 37715.09578971941
R-squared: 0.5788756843542795
Adjusted R-squared: 0.5536230798887514
Row count: 602

Cluster 26:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
107                latitude  3.088071 -2590.291689
108               longitude  3.767416 -4059.718771
109              has_Tennis  1.481238   686.097725
110             has_Parking  1.437362  -140.934793
111               has_Alarm  1.032300 -1454.094111
112                has_Golf  1.199589  1559.394386
113                  has_TV  1.273073   430.674765
114           has_Clubhouse  1.435624   -60.057968
115          has_Playground  1.269781  -656.544585
116        has_Refrigerator  4.653471    72.435765
117  has_Cable_or_Satellite  1.517556  -635.922424
118             has_Unknown  1.555993    56.744970
119               has_Gated  1.432860  -333.804514
120                has_Pool  1.694521   509.007485
121         has_Wood_Floors  1.236230  -636.120235
122     has_Internet_Access  2.110323    29.185230
123                has_View  1.238710   287.926141
124            has_Elevator  2.463927    74.493645
125             has_Hot_Tub  1.125042  -592.500676
126                 has_Gym  1.859398   138.434173
127             has_Storage  1.994534  -558.773836
128          has_Dishwasher  3.621599   -26.907471
129        has_Washer_Dryer  1.490012  -146.631208
130          has_Patio/Deck  1.964499   119.526444
131    has_Garbage_Disposal  1.093270  -802.984673
132              has_Luxury  1.067727   750.705291
133                  has_AC  2.346342    35.577996
134           has_Fireplace  1.119332  5795.457881
135            has_photo_no  1.235973  -346.858132
136           has_photo_yes  1.324119  -338.920415
137        pets_allowed_Yes  1.392810    34.918839
138               bathrooms  3.321753   -53.975797
139                bedrooms  3.049911   -40.964581
140      scaled_square_feet  3.666878   825.642313
141                  week_1  1.074234   652.750976
142                  week_2  1.148029 -1439.437462
143                  week_3  1.846748  -371.289173
Intercept: -263846.43893598486
Mean Squared Error: 2251678.5538327172
R-squared: 0.5093369571263023
Adjusted R-squared: 0.48101475340348354
Row count: 679

Cluster 27:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
144                latitude  1.434561   -18.337101
145               longitude  1.247024    31.541353
146              has_Tennis  1.463843   -13.994646
147             has_Parking  1.577460    57.106376
148                has_Golf  1.052277    -5.624343
149                  has_TV  1.403699   -47.228300
150           has_Clubhouse  2.030043    33.666593
151          has_Playground  1.250824   -65.830676
152        has_Refrigerator  3.046823    21.402016
153  has_Cable_or_Satellite  2.133555    46.345236
154             has_Unknown  2.041357    68.925699
155               has_Gated  1.263355   188.265968
156                has_Pool  2.278023    -9.992079
157         has_Wood_Floors  1.238778   -35.906637
158     has_Internet_Access  1.562636     3.386001
159                has_View  1.120892  -143.918867
160            has_Elevator  1.511279    78.647699
161             has_Hot_Tub  1.320686   144.985027
162                 has_Gym  1.944179   -24.140229
163             has_Storage  1.572828    14.920366
164          has_Dishwasher  2.941548   -35.006950
165        has_Washer_Dryer  1.694087    47.422258
166          has_Patio/Deck  1.762925    -6.175560
167    has_Garbage_Disposal  1.793853   -73.918466
168              has_Luxury  1.015439    59.459176
169                  has_AC  1.579443   -91.858526
170           has_Fireplace  1.673575    15.885561
171            has_photo_no  1.103473    -1.090400
172           has_photo_yes  1.313647    20.604818
173        pets_allowed_Yes  1.142320    -1.107586
174               bathrooms  2.458125   137.730999
175                bedrooms  2.061867   -33.644569
176      scaled_square_feet  2.660010   207.588477
177                  week_1  1.032163  -251.972031
178                  week_2  1.325146   -15.453483
179                  week_3  1.020268   132.144879
Intercept: 4044.5581003572042
Mean Squared Error: 26833.93966146942
R-squared: 0.6173742372465155
Adjusted R-squared: 0.6046553753267321
Row count: 1120

Cluster 28:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
180                latitude  1.425622    24.484437
181               longitude  1.282202   124.315036
182              has_Tennis  1.183380    -1.155128
183             has_Parking  1.725407   -37.553238
184                  has_TV  1.903738    48.711510
185           has_Clubhouse  1.349101   107.577374
186          has_Playground  1.264631   -96.232067
187        has_Refrigerator  3.523577   -61.328494
188  has_Cable_or_Satellite  4.250913   -34.319347
189             has_Unknown  2.394420   -17.439574
190               has_Gated  3.295068    -4.290532
191                has_Pool  3.080604  -167.343480
192         has_Wood_Floors  1.146711    16.874702
193     has_Internet_Access  1.497079    69.243398
194            has_Elevator  3.176906    55.556203
195             has_Hot_Tub  3.939811    40.333528
196                 has_Gym  1.993082    40.473537
197             has_Storage  1.716205    44.823308
198          has_Dishwasher  3.549254   -24.571060
199        has_Washer_Dryer  1.974629    72.465065
200          has_Patio/Deck  3.114274    -7.847409
201    has_Garbage_Disposal  1.427444   -28.976934
202              has_Luxury  1.066434   146.058467
203                  has_AC  2.981800   -17.665002
204           has_Fireplace  1.138094   -66.272520
205            has_photo_no  1.408960  -101.897266
206           has_photo_yes  1.351421   -63.223590
207        pets_allowed_Yes  1.298637    91.294944
208               bathrooms  3.633205   159.960339
209                bedrooms  2.588370    18.215804
210      scaled_square_feet  4.171135   185.561875
211                  week_2  1.202475  -145.302659
212                  week_3  1.264658  -437.549692
Intercept: 11039.86353498449
Mean Squared Error: 23716.220990500115
R-squared: 0.8393843166510828
Adjusted R-squared: 0.8252877274209917
Row count: 410

Cluster 31:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
213                latitude  2.249864   368.020986
214               longitude  1.994283  -169.001600
215              has_Tennis  2.576161    -7.958800
216             has_Parking  2.943782   150.163005
217               has_Alarm  1.086131   -84.731674
218                  has_TV  1.916234   -14.326476
219           has_Clubhouse  2.839885   -19.005652
220          has_Playground  1.692816   -64.938845
221        has_Refrigerator  2.322993    50.778589
222  has_Cable_or_Satellite  2.324485    13.637131
223             has_Unknown  3.934225   344.175038
224               has_Gated  2.457681   160.477971
225                has_Pool  4.945161   253.461813
226         has_Wood_Floors  2.710929   186.201573
227     has_Internet_Access  1.503650   -11.573857
228            has_Elevator  1.033379   -16.711637
229             has_Hot_Tub  1.759852  -297.664109
230                 has_Gym  4.528346   -44.271711
231             has_Storage  3.028234   193.432546
232          has_Dishwasher  2.639886    30.997638
233        has_Washer_Dryer  2.221956   235.076917
234          has_Patio/Deck  1.987966   -65.699808
235    has_Garbage_Disposal  1.983527  -135.533095
236              has_Luxury  1.051454   463.344151
237                  has_AC  1.547080  -184.501910
238           has_Fireplace  2.105521  -110.259339
239            has_photo_no  2.948581    76.879004
240           has_photo_yes  3.665159    41.194941
241        pets_allowed_Yes  1.413314    -1.249423
242               bathrooms  3.142061  -170.473769
243                bedrooms  3.919592    29.994665
244      scaled_square_feet  4.372655   257.346292
245                  week_2  1.299492    -8.517294
246                  week_4  2.534676   -47.734478
Intercept: -29325.66692899796
Mean Squared Error: 64063.53799535754
R-squared: 0.5097568224691791
Adjusted R-squared: 0.49181466096643645
Row count: 964

Cluster 34:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
247                latitude  1.152122    35.064193
248               longitude  1.366616  1257.968726
249              has_Tennis  1.821905   -61.223281
250             has_Parking  1.378773    27.438560
251               has_Alarm  1.166818   -77.778679
252                  has_TV  1.587315   125.879702
253           has_Clubhouse  1.422935  -223.079381
254          has_Playground  1.314426  -115.753163
255        has_Refrigerator  2.041495  -157.346325
256  has_Cable_or_Satellite  2.059411  -153.683733
257             has_Unknown  1.685773   135.702883
258               has_Gated  1.183067  -313.273162
259                has_Pool  2.035635     1.346806
260         has_Wood_Floors  1.347410   -48.579155
261     has_Internet_Access  1.661090   107.568112
262                has_View  1.092364   323.677881
263            has_Elevator  1.509434   -75.358716
264             has_Hot_Tub  1.360583     4.585270
265                 has_Gym  2.323241   237.121114
266             has_Storage  1.256634   -17.336234
267             has_Doorman  1.111054   822.118412
268          has_Dishwasher  2.277427   176.354908
269        has_Washer_Dryer  1.691046   -17.788446
270          has_Patio/Deck  1.276570   200.875856
271    has_Garbage_Disposal  1.440069    13.357643
272              has_Luxury  1.013006   235.957884
273                  has_AC  1.666563   -76.260814
274           has_Fireplace  1.470333   148.222755
275            has_photo_no  1.588695    49.438454
276           has_photo_yes  1.666215   -24.564357
277        pets_allowed_Yes  1.063745   226.100371
278               bathrooms  1.840616   678.809654
279                bedrooms  1.709395   118.617813
280      scaled_square_feet  1.147502    55.977877
281                  week_1  1.261274   787.484329
282                  week_2  1.168254   -41.833901
283                  week_4  1.165535   -67.326548
Intercept: 92559.24825598963
Mean Squared Error: 528643.6283043601
R-squared: 0.450272776159798
Adjusted R-squared: 0.4447769892692043
Row count: 3739

Cluster 44:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
284                latitude  1.201489   310.344190
285               longitude  1.208592   249.454993
286              has_Tennis  1.222968    74.492336
287             has_Parking  1.686367   -31.697034
288                has_Golf  1.030520   214.278831
289                  has_TV  1.263832   -12.166824
290           has_Clubhouse  1.695784    57.333091
291          has_Playground  1.633750  -125.774369
292        has_Refrigerator  2.652774   217.137095
293  has_Cable_or_Satellite  2.337762  -138.414110
294             has_Unknown  2.071198   -54.211757
295               has_Gated  1.265729  -119.624941
296                has_Pool  1.740372  -175.779693
297         has_Wood_Floors  1.179533   -24.719450
298     has_Internet_Access  1.568236    26.295377
299                has_View  1.222926   188.648361
300            has_Elevator  1.139927  -115.024189
301             has_Hot_Tub  1.436480   -49.009597
302                 has_Gym  1.840297    50.254818
303             has_Storage  1.496419   156.228641
304          has_Dishwasher  2.737684  -123.054841
305        has_Washer_Dryer  1.646259    -6.636130
306          has_Patio/Deck  1.935840   -13.171810
307    has_Garbage_Disposal  1.414632   -90.838754
308              has_Luxury  1.017971  -157.511953
309                  has_AC  1.363665   530.067121
310           has_Fireplace  1.596622   -77.760333
311            has_photo_no  1.626369   103.904109
312           has_photo_yes  1.943934    10.036071
313        pets_allowed_Yes  1.274873   131.645676
314               bathrooms  2.472577   186.952026
315                bedrooms  2.571469   -94.291524
316      scaled_square_feet  3.002829   376.649398
317                  week_1  1.047554  -230.806861
318                  week_2  1.047663  -284.818582
319                  week_4  2.007445    44.860292
Intercept: 16453.505092533036
Mean Squared Error: 150688.65937530802
R-squared: 0.5036166784693006
Adjusted R-squared: 0.4965395301227242
Row count: 2562

Cluster 47:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
320                latitude  1.173725   -87.225699
321               longitude  1.511964    67.603193
322              has_Tennis  1.694084   -67.797963
323             has_Parking  1.652125   116.437293
324               has_Alarm  1.054713   131.467717
325                  has_TV  1.188287   346.805640
326           has_Clubhouse  2.045167  -222.523875
327          has_Playground  1.582571    88.770916
328        has_Refrigerator  2.029751   -96.460080
329  has_Cable_or_Satellite  1.815089   -92.387808
330             has_Unknown  2.046175    30.886440
331               has_Gated  1.644510   147.987510
332                has_Pool  1.965689   133.436188
333         has_Wood_Floors  1.335434    10.428920
334     has_Internet_Access  1.761018    96.365457
335                has_View  1.324641   325.643102
336            has_Elevator  1.489237   228.982610
337             has_Hot_Tub  1.324958   107.838628
338                 has_Gym  1.833435    -6.480967
339             has_Storage  1.501372    48.356748
340             has_Doorman  1.089843  -154.602854
341          has_Dishwasher  2.046573   -50.550506
342        has_Washer_Dryer  1.455685   -48.532424
343          has_Patio/Deck  1.708304   -21.977611
344    has_Garbage_Disposal  2.144492   105.854561
345                  has_AC  1.721562   -93.890229
346           has_Fireplace  1.224764   -99.693080
347            has_photo_no  1.189097  -139.766268
348           has_photo_yes  2.587584    63.037352
349        pets_allowed_Yes  1.279487     7.651626
350               bathrooms  2.839888   149.655941
351                bedrooms  2.583258   -19.761643
352      scaled_square_feet  3.379087   259.710399
353                  week_3  1.307496     0.344846
Intercept: 9668.062412212703
Mean Squared Error: 85585.89904218251
R-squared: 0.6337659455703213
Adjusted R-squared: 0.6078782993112588
Row count: 516

Cluster 53:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
354                latitude  1.988396  -303.668476
355               longitude  1.785811  -610.592987
356              has_Tennis  2.491713   -88.706986
357             has_Parking  1.864728  -140.285203
358                  has_TV  1.969075  -384.537969
359           has_Clubhouse  2.855335   108.990774
360          has_Playground  2.054019   -30.843364
361        has_Refrigerator  2.451989   -79.086514
362  has_Cable_or_Satellite  2.959612   221.480343
363             has_Unknown  2.047498   364.932766
364               has_Gated  1.979445  -114.542060
365                has_Pool  2.516879     6.770874
366         has_Wood_Floors  1.342344   -31.666811
367     has_Internet_Access  1.801831  -184.182748
368             has_Hot_Tub  1.794718   441.122052
369                 has_Gym  2.154901   -34.070284
370             has_Storage  2.107997   209.432398
371          has_Dishwasher  3.518400   224.928337
372        has_Washer_Dryer  2.078196    -8.620650
373          has_Patio/Deck  2.058006  -136.772679
374    has_Garbage_Disposal  2.650698  -284.085952
375                  has_AC  2.542053  -180.109964
376           has_Fireplace  1.471547   126.835507
377            has_photo_no  1.435952  -181.352802
378           has_photo_yes  4.152377   132.548000
379        pets_allowed_Yes  1.710351   -81.269633
380               bathrooms  2.759455   151.746953
381                bedrooms  2.656601    70.548665
382                  week_3  2.046609    97.277592
383                  week_4  3.124502  -224.486252
Intercept: -61167.46591885238
Mean Squared Error: 74242.4002257599
R-squared: 0.6938263919705878
Adjusted R-squared: 0.5939871719609968
Row count: 123

Cluster 65:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
384                latitude  2.250119 -1683.199581
385               longitude  2.185719 -2002.130850
386              has_Tennis  1.349149   151.867735
387             has_Parking  1.598988    10.777584
388               has_Alarm  1.124352  7376.139515
389                  has_TV  2.005362  -118.129365
390           has_Clubhouse  1.562565    23.477113
391          has_Playground  1.456392   215.965049
392        has_Refrigerator  2.992003   195.176157
393  has_Cable_or_Satellite  1.680428   -70.390592
394             has_Unknown  2.203372   163.418644
395               has_Gated  1.280249  -149.245055
396                has_Pool  1.795889   -72.489192
397         has_Wood_Floors  1.196684  -272.050118
398     has_Internet_Access  1.711542    85.745903
399                has_View  1.088141   132.912074
400            has_Elevator  1.735218   241.397191
401             has_Hot_Tub  1.275935  -333.307705
402                 has_Gym  1.888777     2.585058
403             has_Storage  1.455292   136.128962
404          has_Dishwasher  2.771766   108.861689
405        has_Washer_Dryer  1.719897    15.506317
406          has_Patio/Deck  1.670875     7.458357
407    has_Garbage_Disposal  1.377741  -249.112248
408                  has_AC  1.684773   -96.833400
409           has_Fireplace  1.755189  -149.774451
410            has_photo_no  1.200381  -295.713235
411           has_photo_yes  2.285663   -93.118162
412        pets_allowed_Yes  1.557686   259.879881
413               bathrooms  3.256267   182.796803
414                bedrooms  2.599097     0.474360
415      scaled_square_feet  3.328509   581.533127
416                  week_1  1.092685  -409.847543
417                  week_3  1.088017   357.120561
Intercept: -180006.84738826434
Mean Squared Error: 592664.1799995424
R-squared: 0.5989894372154505
Adjusted R-squared: 0.58372139787091
Row count: 928

Cluster 3:
Features, VIF, and Coefficients:
                    feature       VIF  coefficient
418                latitude  2.306794  -185.993421
419               longitude  1.427660  -364.172196
420              has_Tennis  1.461539   -33.821252
421             has_Parking  1.640711    64.383381
422               has_Alarm  1.125003   107.323624
423                  has_TV  1.337371   -61.435621
424           has_Clubhouse  1.683501   -52.094889
425          has_Playground  1.286114  -125.273184
426        has_Refrigerator  2.422976    77.505215
427  has_Cable_or_Satellite  2.021441    75.825175
428             has_Unknown  1.995962    46.619032
429               has_Gated  1.277878   120.101271
430                has_Pool  2

*** WARNING: max output size exceeded, skipping output. ***

8645    56.972420
1930               bathrooms  3.011470    82.448247
1931                bedrooms  3.279199   -46.349264
1932      scaled_square_feet  3.173206   199.718868
1933                  week_1  1.007687  -141.724932
1934                  week_2  1.072396    20.112800
1935                  week_4  2.190593    -8.398322
Intercept: -172.67882695124513
Mean Squared Error: 38088.04746669037
R-squared: 0.4734549597793821
Adjusted R-squared: 0.4610087211398928
Row count: 1560

Cluster 30:
Features, VIF, and Coefficients:
                   feature       VIF  coefficient
1936              latitude  1.434548  -119.725832
1937             longitude  1.761341   -45.426298
1938            has_Tennis  1.865724   -79.817696
1939           has_Parking  1.700880    78.531074
1940             has_Alarm  1.354169    98.627273
1941                has_TV  1.718372    48.175926
1942         has_Clubhouse  2.096930    39.481177
1943        has_Playground  2.899749  -272.341227
1944      has_Refrigerator  4.628556   -68.908130
1945           has_Unknown  2.039074   -25.559077
1946             has_Gated  2.130883  -115.767498
1947              has_Pool  2.592135   -12.897698
1948       has_Wood_Floors  1.453908   -13.210499
1949   has_Internet_Access  2.177425    86.060680
1950              has_View  1.621438   215.024668
1951          has_Elevator  1.545671   181.025409
1952           has_Hot_Tub  1.208860  -339.199566
1953               has_Gym  2.972707   170.700642
1954           has_Storage  2.202129   142.952190
1955           has_Doorman  4.519825   814.833739
1956        has_Dishwasher  4.085272   143.477661
1957      has_Washer_Dryer  1.676665   -77.635153
1958        has_Patio/Deck  2.375977   -45.545854
1959  has_Garbage_Disposal  2.337646  -238.717300
1960            has_Luxury  1.051359   449.058538
1961                has_AC  2.157132  -114.413200
1962         has_Fireplace  1.782878   138.751078
1963          has_photo_no  1.300738  -145.215698
1964         has_photo_yes  1.994199   -75.707368
1965      pets_allowed_Yes  1.565147    59.757435
1966             bathrooms  3.298055   113.926688
1967              bedrooms  3.537323  -145.945105
1968    scaled_square_feet  3.254916   278.752334
1969                week_1  1.244012   -82.078739
1970                week_3  1.148181  -290.681247
1971                week_4  2.479113   -28.044157
Intercept: 531.8538397098578
Mean Squared Error: 42665.18494807305
R-squared: 0.7460699467049043
Adjusted R-squared: 0.7234983864120069
Row count: 442

Cluster 32:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
1972                latitude  1.339000   382.927082
1973               longitude  1.127576     5.484437
1974              has_Tennis  1.213700   -22.871709
1975             has_Parking  2.236835    48.415574
1976                has_Golf  1.114947   126.959837
1977                  has_TV  1.702269  -112.351138
1978           has_Clubhouse  2.556595    67.422478
1979          has_Playground  1.584285   -93.889444
1980        has_Refrigerator  1.977698  -181.189102
1981  has_Cable_or_Satellite  2.956775     8.292049
1982             has_Unknown  2.295884    61.937302
1983               has_Gated  1.260178   104.458775
1984                has_Pool  1.994733  -202.327825
1985         has_Wood_Floors  1.269502  -225.838207
1986     has_Internet_Access  2.540800  -168.053146
1987                has_View  1.238982   271.953934
1988            has_Elevator  1.126334  -314.353936
1989             has_Hot_Tub  1.286818    22.262386
1990                 has_Gym  1.701742   270.111426
1991             has_Storage  1.763043   223.119613
1992             has_Doorman  1.063766    94.237801
1993          has_Dishwasher  2.068223    84.871809
1994        has_Washer_Dryer  1.633566   -17.174185
1995          has_Patio/Deck  1.668328  -279.686605
1996    has_Garbage_Disposal  1.422668   335.853122
1997              has_Luxury  1.041415   174.818517
1998                  has_AC  1.463632  -170.748884
1999           has_Fireplace  1.667752    17.808902
2000            has_photo_no  1.436973   170.171147
2001           has_photo_yes  1.474117    83.246918
2002        pets_allowed_Yes  1.202965    94.073830
2003               bathrooms  2.029350   499.765685
2004                bedrooms  1.852952  -147.056910
2005      scaled_square_feet  1.411079   133.927949
2006                  week_2  1.179680   -52.238459
2007                  week_4  1.217799    91.995805
Intercept: -15134.758273455582
Mean Squared Error: 181533.82197684297
R-squared: 0.48531669751266215
Adjusted R-squared: 0.4728058272293778
Row count: 1518

Cluster 33:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2008                latitude  1.218155    46.638129
2009               longitude  1.130242    72.163760
2010              has_Tennis  1.310495   -47.914562
2011             has_Parking  1.586657   122.358706
2012               has_Alarm  1.730708   203.939909
2013                  has_TV  1.453578    99.235270
2014           has_Clubhouse  1.672613    26.759635
2015          has_Playground  1.517128   -68.017787
2016        has_Refrigerator  2.261984   -65.204078
2017  has_Cable_or_Satellite  2.708779   -28.995376
2018             has_Unknown  1.839255    20.067400
2019               has_Gated  1.352524    36.698861
2020                has_Pool  1.869718   -10.653561
2021         has_Wood_Floors  1.081476    81.933412
2022     has_Internet_Access  2.341217     0.529009
2023                has_View  1.018347    43.744015
2024            has_Elevator  1.126737   207.376969
2025             has_Hot_Tub  1.113388   -23.998522
2026                 has_Gym  1.873510    29.521275
2027             has_Storage  1.230639    25.752947
2028        has_Washer_Dryer  1.795317   -22.508226
2029          has_Patio/Deck  1.562762   -71.178821
2030    has_Garbage_Disposal  2.286279     6.793035
2031              has_Luxury  1.014664    -0.274277
2032                  has_AC  2.709267   -84.374999
2033           has_Fireplace  1.620906   -19.515970
2034            has_photo_no  1.235139   125.566542
2035           has_photo_yes  2.753403     7.423620
2036        pets_allowed_Yes  1.027469    10.012207
2037               bathrooms  3.286275    90.047465
2038                bedrooms  4.341075   -35.102902
2039      scaled_square_feet  3.655762   186.707214
2040                  week_1  2.481062   -93.406071
2041                  week_2  2.581541  -140.206878
2042                  week_3  1.292869  -176.340832
Intercept: 4876.42767092716
Mean Squared Error: 61121.11366852651
R-squared: 0.518020497193971
Adjusted R-squared: 0.5092435551553597
Row count: 1958

Cluster 36:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2043                latitude  1.214092  -339.466340
2044               longitude  1.225386  -623.500724
2045              has_Tennis  1.213728   -85.389412
2046             has_Parking  1.475408   147.719157
2047               has_Alarm  1.087682   -50.415679
2048                  has_TV  1.343998    10.059676
2049           has_Clubhouse  1.495789    24.129918
2050          has_Playground  1.210256  -162.914394
2051        has_Refrigerator  1.772236  -253.421600
2052  has_Cable_or_Satellite  1.801240   -81.064487
2053             has_Unknown  1.976963   181.223368
2054               has_Gated  1.239363   -62.198667
2055                has_Pool  1.900650   -45.134351
2056         has_Wood_Floors  1.114010    47.351885
2057     has_Internet_Access  1.461102   -36.674935
2058                has_View  1.080608   412.426908
2059            has_Elevator  1.211370   107.789522
2060             has_Hot_Tub  1.629382    48.811166
2061                 has_Gym  1.752185    12.324874
2062             has_Storage  1.201905   -36.006885
2063          has_Dishwasher  1.957131    27.780426
2064        has_Washer_Dryer  1.506079    80.243971
2065          has_Patio/Deck  1.448740     7.002230
2066    has_Garbage_Disposal  1.229933   -52.563745
2067              has_Luxury  1.051873   278.178695
2068                  has_AC  1.567322   -64.543497
2069           has_Fireplace  1.431528   -30.627746
2070            has_photo_no  1.187982   204.496758
2071           has_photo_yes  1.950547   110.375156
2072        pets_allowed_Yes  1.145488   149.715384
2073               bathrooms  2.920641   161.996648
2074                bedrooms  2.813122   -38.677423
2075      scaled_square_feet  3.090298   508.076729
2076                  week_2  2.231954  -173.327902
2077                  week_3  1.248974   179.142120
2078                  week_4  3.152810   215.045093
Intercept: -61367.340321044416
Mean Squared Error: 226826.97421245003
R-squared: 0.6403385352002524
Adjusted R-squared: 0.6303402242096803
Row count: 1332

Cluster 42:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2079                latitude  1.457918   -20.258362
2080               longitude  1.525291  -180.108841
2081              has_Tennis  1.326384   -66.157071
2082             has_Parking  1.405809    74.382883
2083               has_Alarm  1.253712   204.363912
2084                  has_TV  1.106727    65.708483
2085           has_Clubhouse  1.642217   -53.235173
2086          has_Playground  1.407410   -74.774203
2087        has_Refrigerator  2.088436   -38.711318
2088  has_Cable_or_Satellite  1.624676    23.414526
2089             has_Unknown  1.694421    73.377212
2090               has_Gated  1.278913    25.873373
2091                has_Pool  2.232507   -22.638377
2092         has_Wood_Floors  1.298938    -3.895335
2093     has_Internet_Access  1.569960    17.791374
2094                has_View  1.379836    80.424282
2095            has_Elevator  1.215892    96.178942
2096             has_Hot_Tub  1.266222  -136.038511
2097                 has_Gym  1.948828    35.353426
2098             has_Storage  1.266615    -0.375985
2099          has_Dishwasher  2.570369   -27.710983
2100        has_Washer_Dryer  1.662488   -23.266366
2101          has_Patio/Deck  1.462317    81.203264
2102    has_Garbage_Disposal  1.284778   129.664837
2103                  has_AC  1.897240   -49.768315
2104           has_Fireplace  1.253740    91.652271
2105            has_photo_no  1.198865    20.487481
2106           has_photo_yes  1.272205     0.377905
2107        pets_allowed_Yes  1.272911  -196.226748
2108               bathrooms  2.304167   106.219177
2109                bedrooms  2.343638   -11.526678
2110      scaled_square_feet  2.695699   246.209419
2111                  week_1  1.187871   120.794066
2112                  week_2  1.083123   113.018405
2113                  week_3  1.344681  -294.254762
Intercept: -13653.899972260646
Mean Squared Error: 69144.51026126942
R-squared: 0.5862816333736157
Adjusted R-squared: 0.5745852336709312
Row count: 1274

Cluster 46:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2114                latitude  1.875284  -911.373570
2115               longitude  2.601867  -415.074850
2116              has_Tennis  1.339433    33.488034
2117             has_Parking  1.504466   100.307039
2118               has_Alarm  1.173955   176.716016
2119                  has_TV  1.174047   185.907836
2120           has_Clubhouse  1.395971   -57.255588
2121          has_Playground  1.343494   -70.325367
2122        has_Refrigerator  1.437462    38.486647
2123  has_Cable_or_Satellite  1.526662   -66.246308
2124             has_Unknown  2.010487    76.214066
2125               has_Gated  1.309189   -72.380834
2126                has_Pool  1.714395    41.939706
2127         has_Wood_Floors  1.223167     4.064756
2128     has_Internet_Access  1.556862   138.708130
2129                has_View  1.085598   117.524758
2130            has_Elevator  1.279256    84.941620
2131             has_Hot_Tub  1.677418  -180.012837
2132                 has_Gym  1.753432    37.093332
2133             has_Storage  1.261044   100.493986
2134          has_Dishwasher  1.704314   -44.476601
2135        has_Washer_Dryer  1.278378    55.556082
2136          has_Patio/Deck  1.604458    48.512500
2137    has_Garbage_Disposal  1.084447  -132.334307
2138              has_Luxury  1.020276    29.154011
2139                  has_AC  1.505663   -52.444117
2140           has_Fireplace  1.280096   -57.924276
2141            has_photo_no  1.287154   -26.721409
2142           has_photo_yes  1.999875   -36.047594
2143        pets_allowed_Yes  1.150624   188.721565
2144               bathrooms  3.234787    83.644608
2145                bedrooms  2.751781    23.598758
2146      scaled_square_feet  3.474861   510.576341
2147                  week_1  2.904790    67.404144
2148                  week_2  1.764474    56.973074
2149                  week_4  3.740246   168.268367
Intercept: -17673.73363666568
Mean Squared Error: 139154.96367493764
R-squared: 0.7423420510247722
Adjusted R-squared: 0.7364527264767671
Row count: 1612

Cluster 56:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2150                latitude  2.170028  -100.344766
2151               longitude  2.117584    12.615448
2152              has_Tennis  1.459074    82.020004
2153             has_Parking  1.661409   -27.101088
2154                  has_TV  3.566842    47.118351
2155           has_Clubhouse  1.646532   -14.391388
2156          has_Playground  1.936110  -243.922534
2157        has_Refrigerator  2.667911   -27.102534
2158  has_Cable_or_Satellite  2.816522     6.206294
2159             has_Unknown  2.510651   138.223435
2160               has_Gated  1.911457   -26.082147
2161                has_Pool  1.997424    66.041855
2162         has_Wood_Floors  1.384205  -186.540307
2163     has_Internet_Access  1.864278    -8.043419
2164                has_View  1.695226     9.362647
2165            has_Elevator  1.432498   -34.011724
2166             has_Hot_Tub  1.658091   -63.152912
2167                 has_Gym  2.006888    51.372737
2168             has_Storage  2.173580    24.476048
2169        has_Washer_Dryer  2.154768     1.400483
2170          has_Patio/Deck  1.876410    10.653189
2171    has_Garbage_Disposal  2.499278   -70.705429
2172              has_Luxury  1.089022     8.340499
2173                  has_AC  1.994308   -46.356439
2174           has_Fireplace  1.757130    53.306315
2175            has_photo_no  1.498413  -183.427457
2176        pets_allowed_Yes  1.577083    50.287594
2177               bathrooms  2.725094   130.244539
2178      scaled_square_feet  2.642961   153.419147
2179                  week_2  2.692326   -74.824078
2180                  week_3  2.019251   -51.861525
2181                  week_4  2.855917    14.876479
Intercept: 4477.672591450521
Mean Squared Error: 49859.67990702523
R-squared: 0.5711007299589995
Adjusted R-squared: 0.4655255250258301
Row count: 163

Cluster 58:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2182                latitude  1.421524   218.258813
2183               longitude  1.185976  -129.897425
2184              has_Tennis  1.285034   -78.741518
2185             has_Parking  1.454515    38.514787
2186               has_Alarm  1.315591    30.456136
2187                  has_TV  1.172729    25.608230
2188           has_Clubhouse  1.611258   -40.897408
2189          has_Playground  1.498749   -55.171307
2190        has_Refrigerator  2.931232    29.632254
2191  has_Cable_or_Satellite  1.989209   -36.464235
2192             has_Unknown  1.723190   165.403510
2193               has_Gated  1.366783    79.276640
2194                has_Pool  1.867676    99.273047
2195         has_Wood_Floors  1.109303    42.659168
2196     has_Internet_Access  1.470281   -34.497868
2197            has_Elevator  1.268488   210.772445
2198             has_Hot_Tub  1.199878   -75.294908
2199                 has_Gym  1.853778     1.576868
2200             has_Storage  1.397158    -9.369576
2201             has_Doorman  1.088001   136.955221
2202          has_Dishwasher  2.918221   -14.541034
2203        has_Washer_Dryer  1.575451   -61.855681
2204          has_Patio/Deck  1.566517    -5.095606
2205    has_Garbage_Disposal  2.007047   116.636134
2206              has_Luxury  1.021769   176.441407
2207                  has_AC  1.968898   -66.277140
2208           has_Fireplace  1.431181     1.222987
2209            has_photo_no  1.264585    62.229652
2210           has_photo_yes  1.368267    36.836267
2211        pets_allowed_Yes  1.107388   263.676441
2212               bathrooms  3.032987   138.483815
2213                bedrooms  3.191892   -26.681383
2214      scaled_square_feet  3.102206   170.543747
2215                  week_1  1.082545    39.998342
2216                  week_2  1.208090    -5.651413
2217                  week_4  1.551812   -63.026142
Intercept: -17796.755570972648
Mean Squared Error: 75324.15948242611
R-squared: 0.39623623909605477
Adjusted R-squared: 0.3894204675712689
Row count: 3226

Cluster 60:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2218                latitude  2.123009  -115.781409
2219               longitude  2.100082   -16.119602
2220              has_Tennis  2.253828  -177.406174
2221             has_Parking  2.752087   -45.563338
2222                  has_TV  3.895313   272.934468
2223           has_Clubhouse  1.799769   202.887031
2224          has_Playground  2.079895  -280.350030
2225        has_Refrigerator  3.056784   -18.297041
2226  has_Cable_or_Satellite  3.465682  -213.695628
2227             has_Unknown  2.531149    98.589712
2228               has_Gated  2.042325   434.855325
2229                has_Pool  3.825845  -119.647004
2230         has_Wood_Floors  1.777726   -18.674486
2231     has_Internet_Access  3.819287   336.127270
2232                has_View  1.743311  -305.357724
2233            has_Elevator  1.980473   218.060947
2234                 has_Gym  4.963606   518.368322
2235             has_Storage  2.488823    87.709396
2236          has_Dishwasher  2.798890    56.055390
2237        has_Washer_Dryer  2.552049   -22.001300
2238          has_Patio/Deck  2.359162  -134.112805
2239    has_Garbage_Disposal  1.867332    -6.170532
2240                  has_AC  2.937379  -110.055722
2241           has_Fireplace  1.663258    52.604120
2242        pets_allowed_Yes  2.132433   -29.710472
2243               bathrooms  2.507183   265.254484
2244                bedrooms  3.421376   -82.106029
2245      scaled_square_feet  3.533567   192.024207
2246                  week_3  2.545025   -51.364393
2247                  week_4  2.799710   -41.664168
Intercept: 4225.568148349634
Mean Squared Error: 68501.1109531393
R-squared: 0.6892352596100243
Adjusted R-squared: 0.5769106546497922
Row count: 114

Cluster 66:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2248                latitude  3.409998  -421.607881
2249              has_Tennis  1.846251     8.821250
2250             has_Parking  2.436991     4.676629
2251               has_Alarm  1.498140   175.874248
2252                  has_TV  1.696247   -62.137384
2253           has_Clubhouse  2.117571   -72.644932
2254          has_Playground  2.667748   -49.129911
2255        has_Refrigerator  1.774147   327.997043
2256  has_Cable_or_Satellite  2.153676  -118.811057
2257             has_Unknown  1.685202   377.455968
2258               has_Gated  1.349734   273.248556
2259                has_Pool  3.717100     5.222778
2260         has_Wood_Floors  1.343271   177.465321
2261     has_Internet_Access  2.467205   -12.459440
2262                has_View  1.433024    85.722113
2263            has_Elevator  1.408123   157.979621
2264                 has_Gym  3.048268   145.643237
2265             has_Storage  2.340917    -0.895743
2266          has_Dishwasher  2.391963  -251.112716
2267        has_Washer_Dryer  1.776720   -72.858125
2268          has_Patio/Deck  1.894858   168.984759
2269    has_Garbage_Disposal  3.357753     6.634720
2270                  has_AC  2.979578  -105.251496
2271           has_Fireplace  2.443905   -57.352369
2272            has_photo_no  1.221906   259.146236
2273           has_photo_yes  1.634173   -31.125308
2274        pets_allowed_Yes  1.233293   354.643471
2275               bathrooms  3.649868   368.093184
2276                bedrooms  3.252507    23.552735
2277                  week_2  4.646306  -252.353027
2278                  week_3  1.902023   706.908943
2279                  week_4  3.085498    22.233817
Intercept: 14489.843942944506
Mean Squared Error: 44403.39239160985
R-squared: 0.7240033256743393
Adjusted R-squared: 0.6831149294779452
Row count: 249

Cluster 67:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2280                latitude  2.067224    62.160826
2281               longitude  2.226709   -81.597665
2282              has_Tennis  1.382601     6.724395
2283             has_Parking  1.512593    86.576961
2284               has_Alarm  1.186014  -262.051074
2285                has_Golf  1.057580   247.587733
2286                  has_TV  1.363565   104.802035
2287           has_Clubhouse  1.992063   120.663923
2288          has_Playground  1.611179  -128.639221
2289        has_Refrigerator  3.490503  -253.809052
2290  has_Cable_or_Satellite  2.127730    22.536840
2291             has_Unknown  1.822998    96.490416
2292               has_Gated  1.252106   -93.908765
2293                has_Pool  2.321319   -24.411215
2294         has_Wood_Floors  1.508811    50.377360
2295     has_Internet_Access  1.889853    -0.870209
2296                has_View  1.748370   -72.297914
2297            has_Elevator  1.234836   230.322871
2298             has_Hot_Tub  1.109766    91.350671
2299                 has_Gym  2.141972    22.740849
2300             has_Storage  1.420197    29.779317
2301             has_Doorman  1.716865   -90.572396
2302          has_Dishwasher  3.145248    12.687062
2303        has_Washer_Dryer  1.517581   -13.131889
2304          has_Patio/Deck  1.756549   -52.154594
2305    has_Garbage_Disposal  2.002010    60.527475
2306              has_Luxury  1.021128    56.129693
2307                  has_AC  1.519422    -1.028575
2308           has_Fireplace  1.738392  -109.222052
2309            has_photo_no  1.169499   300.463899
2310           has_photo_yes  2.029887    51.987184
2311        pets_allowed_Yes  1.381728  -232.292373
2312               bathrooms  2.602915   297.036227
2313                bedrooms  2.606112    48.756397
2314      scaled_square_feet  2.809306   156.389040
2315                  week_1  1.209775    82.907275
2316                  week_2  3.107711  -250.940979
2317                  week_3  1.104171   432.438139
Intercept: -9060.405647152316
Mean Squared Error: 117856.24199536812
R-squared: 0.5790327827483543
Adjusted R-squared: 0.5592592917031596
Row count: 848

Cluster 68:
Features, VIF, and Coefficients:
                     feature       VIF  coefficient
2318                latitude  1.875840   490.302215
2319               longitude  1.720388  -124.443444
2320              has_Tennis  1.438158  -106.139867
2321             has_Parking  1.507178    44.710132
2322                  has_TV  1.489304    93.191153
2323           has_Clubhouse  1.548149   -67.775147
2324          has_Playground  1.253032   -83.305832
2325        has_Refrigerator  2.463793   -60.584823
2326  has_Cable_or_Satellite  2.486560    69.507581
2327             has_Unknown  1.862351   -14.065256
2328               has_Gated  1.605061    21.739782
2329                has_Pool  2.220967    57.665546
2330         has_Wood_Floors  1.321396    -2.731416
2331     has_Internet_Access  1.842383   -10.828681
2332            has_Elevator  1.084526   268.663465
2333             has_Hot_Tub  1.366308   -25.098207
2334                 has_Gym  2.020393    36.864115
2335             has_Storage  1.532423    -5.082540
2336          has_Dishwasher  3.181736   -52.622451
2337        has_Washer_Dryer  1.844509    36.961804
2338          has_Patio/Deck  1.962687   -32.942442
2339    has_Garbage_Disposal  1.689271  -139.072038
2340              has_Luxury  1.024418  -202.730014
2341                  has_AC  2.677739   -99.920282
2342           has_Fireplace  1.524547   -35.187990
2343            has_photo_no  1.175013  -726.969527
2344        pets_allowed_Yes  2.865601   -43.225076
2345               bathrooms  3.805906   139.480935
2346                bedrooms  4.809382   -20.839717
2347      scaled_square_feet  4.339836   197.901698
2348                  week_1  1.047782    53.770578
2349                  week_3  1.281547    65.490925
2350                  week_4  3.513481    71.518711
Intercept: -26254.838745430534
Mean Squared Error: 41348.97963689274
R-squared: 0.6466615990846267
Adjusted R-squared: 0.6335307801316905
Row count: 922
In [ ]:
# Assuming results_pd is your pandas DataFrame with the results

# Get VIF results
vif_results = results_pd[['cluster', 'feature', 'VIF']]

# Pivot the results to get columns as features and rows as clusters
vif_pivot = vif_results.pivot(index='cluster', columns='feature', values='VIF')

# Fill NaN values with 'Constant' to indicate constant columns in that cluster
vif_pivot = vif_pivot.fillna('Constant')

# Sort columns alphabetically
vif_pivot = vif_pivot.reindex(sorted(vif_pivot.columns), axis=1)

# Print the results
print("\nVIF values for each feature by cluster:")
display(vif_pivot)

# Optional: Save to CSV
# vif_pivot.to_csv('vif_results_by_cluster.csv')

print("\nNote: 'Constant' indicates that the feature was constant within that cluster.")
VIF values for each feature by cluster:
/databricks/spark/python/pyspark/sql/pandas/conversion.py:413: UserWarning: createDataFrame attempted Arrow optimization because 'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, failed by the reason below:
  Could not convert 'Constant' with type str: tried to convert to double
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.
  warn(msg)
bathroomsbedroomshas_AChas_Alarmhas_Cable_or_Satellitehas_Clubhousehas_Dishwasherhas_Doormanhas_Elevatorhas_Fireplacehas_Garbage_Disposalhas_Gatedhas_Golfhas_Gymhas_Hot_Tubhas_Internet_Accesshas_Luxuryhas_Parkinghas_Patio/Deckhas_Playgroundhas_Poolhas_Refrigeratorhas_Storagehas_TVhas_Tennishas_Unknownhas_Viewhas_Washer_Dryerhas_Wood_Floorshas_photo_nohas_photo_yeslatitudelongitudepets_allowed_Yesscaled_square_feetweek_1week_2week_3week_4
2.43133664691114462.389080591495331.57831125442520381.36756805137799222.14093300917994031.57586627189254052.5286587342995803Constant1.37851783077402821.42344382703668761.5518559813672391.1870914690336536Constant1.92809683404488361.13430349132314961.9095971790757571.00821731264058711.4478000847645591.57606129686870361.6979172426082781.97991573281266821.72457560619010391.43538571171719841.7198223456666381.38730752978682651.51990188655391821.18738044662116371.60534752853191631.1825792443306411.15297395404350221.24618576675365821.58612966585824581.55180413323397161.05136974346648152.7946959402745641.03152849893052221.0252404003413278Constant1.2193279673108346
2.9011351679908772.32357397990304731.65741844151425631.02550474190956861.80497367093654451.20241252339934861.94107051621886331.0269884501225881.56447171026367941.23534068711346451.13051057884961861.5138614412617817Constant1.54330607766142511.26990008604219271.56842249851762071.00951778932669271.37273903894885631.28844261812129961.14644791567976361.55573705057253431.66582664046579091.1067999056530731.1430969025392681.16485947728698221.60979628776686831.06905979966552221.15229028936977971.16927947526800781.44564883917161651.72502518387555041.1362197108493271.13663270040065471.05591628074268182.49365921745524941.06250384728907091.2958253472865175Constant1.34570662714226
3.16134339395279353.79046706077449921.60505033047663641.02510717858409351.69437231887409961.22325088306413334.192245190310863Constant1.11544442419898581.31213286561570121.87435088381029671.1309613066092171.02982000019697771.6129204374641911.06154356650730591.489116959440945Constant1.3115811048097471.53096565937110651.15316441196940641.44417328404248283.38857458990861331.27558621071267161.12228567866632441.17357149363968841.43302629319013741.01958803627602481.53409901472200551.07481689446575261.1058116899324468Constant1.40403500813681471.30606868589551331.1394994311723583.267460709409809Constant1.31873600241548681.39316828202589681.973192352823795
2.20646700743303462.33943178248961961.50984819679799381.12500261042433822.0214408790376261.68350052445936172.453484150258959Constant1.08209544985557681.52993687904379151.28681416239423291.2778777989376222Constant2.194679705358031.11877731605771441.67006542875466861.02399988123771271.64071122462825581.39610658883402541.28611434022128312.166634768808112.4229756362952531.37501087718080871.3373708875555021.4615393352144161.99596232231811381.02525424723222281.46739351652919671.17493099761682871.35378272116523331.60572561417058222.30679359149586951.4276595541116051.13405174829165482.8887894739582621.0109552323775441.04083212885838Constant2.569481524622883
2.93807213859216043.57155260946347572.1190349089359951.15384047062811731.96460623586417781.76427855399021882.8704044182737471.12101855728184981.07823377936377461.53559557050617061.4296307112054071.6241701061981253Constant2.16304356796577531.2294234438587261.61339660223451141.02624583527640741.48196981957755461.407323429988911.79316683705711.77505548230644352.7001278696778131.42597791263482091.47838186808328791.561959694004041.63803845426591121.07149547401445071.76016245626488521.54714735155055341.47940229526560232.09287351808071031.8700032917364832.3493854864205541.20756410653864023.677211708245454Constant1.12123245983698252.83533745729834763.6509224225245474
2.76058097581409452.22663277775356331.46168124931904591.03611092661594651.55194936877212691.44245561194040152.3745177742011841.02430499969542921.08113351949384521.54005204545513391.19102358042339551.09487084082451561.0111557908563072.0307977227682611.37421594269902971.2631757446741061.0154722061341621.40546500818633621.2385313160982871.2068041483327721.67138515978115692.24422473386607631.19242837098757311.15183173032324531.10690289755637931.68437936390973441.12427849348015841.44685769892820871.06688786269942741.36450164538662261.4858188220977291.10223532801287361.08339164144252661.05730277841702812.64558355546698731.02081889151551541.0443668669744837Constant1.2178110725017453
3.59213075402389453.32611637043662261.9187184873055756Constant4.2164835385009983.23614506088306184.9529156773674385Constant3.0845608716890061.81754059457025983.3629728220870962.02243748781530551.24106374431267772.69980099877161632.38517862186016674.097602264873229Constant2.8757552506742382.76916253017922731.68965350660678733.5812443618600384.4985824972655862.35850859934065141.18141977996310141.0812441782789586Constant1.51048687612347422.06582067410235261.33465610338997951.14357753333245922.6055125199291931.4732850886871711.28860259580923771.20179395648365224.726370572631432Constant1.1082214638978461.2320210853719575Constant
2.93176052923284263.63271516937528731.71063753554981471.15226688742019472.02274260406312361.57267995524292983.2038214322436915Constant1.32474652594350741.66041369974546171.43412602820776411.3765079351890108Constant2.08725416084042031.19631707762701781.6802712884902746Constant1.55531394266102561.50454088206231981.32567953368859451.76938142387223873.07058618128968461.5085237611907981.32618099874869191.4649147091319082.0074308608400511.06352533147370991.71960785914211711.2902919761535711.1018628211446773Constant1.17240519459581071.14801604651882271.1969275304214333.21102068335645631.106098250379496Constant1.06434748904589931.6529288049239312
2.17015192661520341.79965098428543361.45064444707201811.1299973801964862.43906902780798251.42768185856622172.6698062039919006Constant1.5930370515024911.23231384378923762.0811602747489971.1724990404160161.18595493561490441.72423886764215831.20372310776567071.97314737588789921.01591492196922231.75270788142466371.71363283895184471.2326429262905371.74782723749647343.5657243855273011.29448193863565451.18391873976333621.18503715821966822.5972912607315541.2247327955354691.27400133043695091.21982078012855451.0556595176013643Constant1.14290316110608671.12819099012201151.39179583247355242.27526565590782551.2319267485900156Constant1.716033516114572.6195894022600448
3.1682140771791893.87942352732239653.6511474700604887ConstantConstant2.3486553720401857ConstantConstant1.32574566833938532.331685833577053.19978529120327872.430540852671346ConstantConstant2.0617667569376283.905078710494897Constant4.6767811848448023.7642773834256891.5470959505882775ConstantConstant2.544627539442361Constant3.89450507604983682.928775268745342Constant2.4631045314245064.967435665457515ConstantConstant2.3389263241402151.78762336969627471.6543720952705445Constant1.74067255888845571.28844889696139591.7467798754442847Constant
1.84038605572443541.6872845610809911.811791026658677Constant1.82291640239334353.31903673534780271.7910161340982981.11077925036676481.27962577880774342.6715218360223991.40330473183279851.078337975086483Constant3.35682255160504451.13500277794020451.76045242758736591.0190281951696481.53065376275668321.46189089669839722.1482185347564933.2311518032348382.06875192469716751.68422780496366432.87210415170256233.5189517430532852.110313326941141.18610303844120321.49855459275852691.33269844470983711.41490532703039951.561848458957911.91040822060972612.65116530206049061.17539354169384882.30963265878382271.1475789110786372.1068575135344747Constant1.7756493200721444
3.29135542522959133.38414955084024662.52353069450102431.07305188218590321.63149781638458011.40864888428031473.351375986579597Constant1.04072664640310291.40508443659990711.79673723186228631.5020873583480427Constant1.61712095288250971.18586064282520941.3109697417358041.01754229459298061.6515353713536611.60238793487142141.7948339216485711.51644694393102534.7327343324258891.49715993342418961.27364922216826271.5805766359002061.7403486084997891.66017151653974151.45998809927944361.15228968335259641.19137729623905671.22866181679654531.1204511140000821.39180403139336221.276418932678233.23479147715957231.07586384818616021.08368410785698771.3615746955899644Constant
2.54170458737549382.67563796012899151.97477895982237861.64056509517984852.63607548695014731.85746770831126922.59947793616659Constant1.1604395951581431.87050960074268651.82772746416967881.4394503714402431Constant1.97010776424642441.8228840244475431.9115671454621561.11441912637141431.61685131043619372.14860827811181571.50683873467521261.57279972945772142.3941594028798181.8429163738531921.5812508639357841.20585091368383561.81735301545998731.12085423358125151.9962236208040531.15107851068750371.42698426893554191.31582630683624261.95584285056926582.09200977705612261.1620674263439783.3942681435246321.10775172700868321.09798907006662861.259793793161874Constant
3.0097932292840273.39922976805123161.7427184159424441.133334674275611.63949460602562771.35883109097238462.96813978551312241.0328658264967751.07194560180532131.58047789545477961.44397277521752021.266690146955243Constant1.49920107721645921.12913759037056781.30394383871218071.01632964606901081.34637308095591.51688759140126561.18559389917512711.47796561504013122.0915372548728621.33507438636371531.07885755594154541.11881227389931671.68808432174561781.02698627796933861.6849410198362061.11560410270108281.27541786552083151.3853318636201421.38237166099329541.1545685149232781.0261659077352572.86895562156873571.03285736615215781.0332563119703038Constant1.4473894908953906
2.7190861630697943.43539855859400632.269186558257791Constant2.8624600156808292.0470437933470773.0963994266978627Constant1.21814178396747971.69123877590580761.97224573817656861.8543227234934385Constant2.3527705673414211.72214072078153931.68028215351818441.01987135690653161.61138933247317781.59425668655268282.5031162023452841.85116658732584942.28602175888953371.65401589973297321.36744774963972772.09861326413779461.75415544021802621.13025912530503161.6463703630031291.30092068687841311.30641777580730941.34719018554953054.0688088560779154.1314279691131661.21848459048976683.8688788085185091.32480412131541781.17816823368373452.089589112265979Constant
2.138023030128032.40805233034348641.6531324935717261.0571635809977081.9655776093090441.6685372632215822.0994026374685566Constant1.21023834685787571.36485066276147961.22647490523041561.233552891544809Constant1.68487945546271421.31888866337062231.5745707339784345Constant1.5377775862937831.35523399682703441.16995843374891951.76930779000214232.2943725162496471.17811590162310731.10486394503622921.31769800793560152.07807813186067621.08568515457040471.43755840375021781.2190244436889761.3942576570271712.72885544494936072.09232657549770141.69512740227766921.44147069747640823.0369706923484312.5594195721410253.0437115321356307Constant2.9423612311311556
1.8332158649797241.87528449565709091.26298604207413031.01629933760544121.6356979278545411.51235486163932141.6723531440886221.09868901360304231.3531637816570921.1981000563144891.2056416765144811.31531648076047361.10647033533322481.63901672131651631.10033116383921681.48245796327306941.01137166089789041.27737784027612841.1976125883848371.20826934319428551.79104679838899241.762270827240021.21371607810566531.16916935999295851.21444206175146931.60695450810252271.05035979495447361.20552669021244261.24513041546672981.36406171568575931.51225667835358851.12257658796575431.16245255999114551.07556555129927972.3316065089678281.07614876087267631.136930213626459Constant1.407018069859243
1.75175888784467371.55703778298878541.93658506199762241.06372649835558852.53446872157722.14707809875700132.0434281793310391.17382555602546931.46370278786621341.66282205283200861.44333136345849121.4407974005666061.619830695502341.91816476625108231.46557487920448251.9857896911166573Constant1.6323978176733711.61188313582046531.73456647343974572.017133396652461.88665471538598341.84086554437451.30494986620850081.6935316228330771.9031409883434405Constant1.62347273012322011.20948638214832151.4779760003270671.6170162094814091.57601711241772021.8051278094074231.22258884099489061.32282860754492981.11295845029785911.3373751658583761Constant1.43653988817954
2.58140198666476372.02829746546079732.4206944387988811.01475690881531942.4066201348724281.40254193970085233.44414084879763751.00959320086011031.78875667411635761.30155078771391591.18122893122087951.59312221431448541.0267936920536071.70932029083223761.04257512878166471.5828277056861761.01006070256935271.37500747753592331.40071440642714481.43662563628883231.69247201218697053.7834536690819391.14223379353619731.07948383058013751.16854411954187221.49140168650716891.0320119967651661.74342607755640741.0803164709321611.22860499304834111.52068329871103611.46504463255982251.5803267418544881.07532802427374152.36654845190818851.07336703212952741.257257879995633Constant1.8725601689713436
2.3427606318107082.4688048087381871.62172624898746351.0987639344743111.5912500686798211.65176113773726832.934987677647984Constant1.2369131629315891.62582615080058531.57800138268871631.7600929556944764Constant2.42541724376756341.43305945840785641.41332522143936881.02092804909522971.63062559361127631.45435872292041911.34974567163266612.257512116137471.91484779060411531.32701978334613881.2300239285034141.33857979504637581.934426480598941Constant1.57927996710508861.24804908131316641.25919875702259291.38363553929047932.3863614529026161.74080736569517441.17563489547861842.517632904172908Constant2.76135256714301521.4412245703724942Constant
3.6671166337658293.16944531983602843.4702046237013784Constant3.16663485151624973.7135774171579723.9839766436925146Constant1.6113242301459323.12912742091933271.6406900422186973.8600910976040304ConstantConstantConstant2.16001952563827Constant1.88174430775738032.6248526579958744Constant2.52794188992269353.40178269002004142.21023365499554551.99645931910835022.9794145928651691.56917919956768431.69878278992874492.03190699968285852.346747772597648ConstantConstant2.9561396114478331.53009913847663711.60302870048697453.9209524422272706ConstantConstant1.25067917550490072.979714492595595
3.0114699425027743.27919851203896772.0109189656018741.06136546883127331.76619296628443561.70090885328087182.89185276286977Constant1.27599354710203321.60803632634882071.9135770537120331.197885658172786Constant2.6597848192067541.60491212017911211.77598127277962361.03194228650016171.47187481530746571.37833370157492091.51066193475383752.68906965924249872.64884247719760341.60295833190639311.21915655397310291.56035020038237531.88797364188888331.440276384799871.69802175867093161.76321592092790351.20646424435476751.25571736608064132.7503292523679581.22887426904316241.18864511529472133.17320570278310271.00768681166048341.0723961770672454Constant2.1905932051726134
2.80736697172308473.2520561127833462.305409592820768Constant3.1950470577560872.3193334375314313.282461556611952Constant1.45287872083436681.68424241407117982.1846475062820651.6478735478677091Constant2.007903188098261.67596801560206472.370341699024242Constant1.69495828158186111.60100392028010762.23752180974228042.29159547496775853.6718270762334841.5390317980063461.11786212798347041.72861451656435831.83244504052630441.42888104229327142.0022000892425661.49683725100714821.28669171081024232.9755443397874712.74636917293744183.01087597497185031.21650877884432853.1319385061984431.1660991138600751Constant1.30631049392428332.934345133069164
2.43495317525993072.6633701507738453.57107330140209681.06422849576364562.9634469095039322.212802171037218Constant1.0372723129021721.65820281903885421.41292796786150573.04578957491357241.7598207529588201Constant2.05396030180225741.1895632660117512.75482873913581461.01598340797382241.71006174205702652.26809297718825451.61145240299532462.08366579493438533.4394905926834341.43007896612142951.32184882853950141.65404736442999531.46487624824308861.18843372934791972.0778469836106461.26166970250373761.08457093345113291.13425894872048061.82178288910968721.55198901131419161.11134179305659872.3581182865173131.02505588873566821.02403717629004441.2313030488412489Constant
3.1187352413061283.11282823183571632.2255362750393041.38952122131928961.81150463947987281.58873245916548992.6996602464736354Constant1.05377081236293321.33930570270187131.38300850725690071.385757675369037Constant1.6854037944183511.34489798405717641.6275970166112571.01108530475841191.33635896262028871.4959304018645771.70803091380585311.45755687410053672.87401578976816861.2951438892136991.1643599981681381.31308590620654251.61184448121674961.0552926483819481.86437803706583941.12454495289270431.20660860864477851.7768452560369271.60510966689185871.22560925711203141.0340704240764823.136601120723133ConstantConstant1.06513246590059121.773073765791025
2.8140364199798622.90767969368888141.67941161414025491.18698768068071341.46311847641428081.53571204883554142.65612873417978921.06370032661185541.10376370239881631.08170060191348541.25098397209436851.3747325725154982Constant2.12225629742373731.33789687519474351.4705568265663311.02143632764287421.3136914610607651.67980814337554521.62798939056966651.72981845203497241.80496754249236281.16626017669392471.28957761904872451.8040158164396971.69301121058266651.11621748402428311.60889043033065621.27239714629013381.19352756221902532.59898751403233152.9110740638214421.65563787108356421.18391377761638482.77021925802445821.2392951932923142Constant4.0331425705004282.012116564568644
3.3217532529343453.04991099166274632.34634233985014131.03230031046799461.5175560126647121.43562389884114033.62159850661138Constant2.46392668732298551.1193322561204581.0932703136695661.43286029369924721.19958852449300111.85939782900538341.12504181308195082.11032264347708761.06772665682117341.43736235792944431.96449902934774581.26978116520258921.69452055396537344.6534714690499961.99453417288091341.27307293514642851.48123837673072821.55599340557229841.23871045071338261.49001165160646121.23622985168202561.23597320238107481.32411914564633483.08807060813736943.76741596401016571.39280988649354283.66687785716154171.07423405514622771.14802868916461281.8467484138434662Constant
2.4581245588763762.0618670442287441.579442603518254Constant2.1335547317792562.03004276538696932.9415484841723885Constant1.51127902522752971.67357530943316651.79385311619165531.26335477858063781.05227650036472051.94417932163889781.32068580151130721.56263579821159551.01543876280819911.57745963053043071.76292493336802371.2508237146644052.2780227074695343.0468234621046941.57282757303243481.40369898670366531.46384347395185292.04135733584734251.12089154849309881.69408742795360451.2387777540129181.10347301120108751.3136472956415081.43456072088979661.24702375620257831.14231955650110932.6600102669837721.03216313162180561.3251463271471071.0202680198638925Constant
3.63320536124444532.5883703486397712.9817999049925246Constant4.25091337981429351.34910097401178563.549254458138823Constant3.1769064673240051.1380935694870821.42744389675957083.295067941577581Constant1.9930819741236893.9398114933431141.49707897903534031.06643395538507571.7254067403523323.1142742056583961.2646305148437953.08060366220986563.52357680256630971.71620522342412741.90373839335159941.18337967538490422.3944195672888013Constant1.97462874302575961.14671137869292061.40896023673828631.35142110365279481.42562181741134041.28220208289422891.29863672343544414.17113493684354Constant1.2024750979961221.264657769446557Constant
3.07779994905373.28470252396310032.29631241993855231.04738905652623981.87838287741063771.4947062472319342.9155762457576681.01149093097428631.62278976909254641.49226359189575341.20650040674134561.5267895129866027Constant1.7171977811091961.10167900361852871.6490827248375151.01404562594020781.50333685098978581.56242677519287071.42856556423346051.56822840350881052.21103797048469941.40643108540646521.1818732968392581.47439402366456341.81461996148711441.04094211394225591.867079153929251.08394092238592951.26352389815881281.7672736602471361.252145007537251.25557584645359221.10526071191946353.25246792232305551.1077830388760811.4809872449037635Constant2.0363497282015675
3.29805463631826433.53732300200005632.15713189371758271.354169055357615Constant2.09692984729664474.0852719573237374.51982533817521.5456713674507131.78287810109441352.33764635654893432.130883498822102Constant2.97270695493891071.20886035295714672.1774253580662321.0513588644695231.70087950828313362.3759769418444042.89974861425932272.59213519233172374.62855607390976952.20212852530411761.71837244933398051.86572386814469552.0390739454166491.62143801607538031.67666498710366141.45390844198411731.30073770279182831.99419908069400351.4345476675554111.76134122046924621.56514731453135973.25491608978767031.2440117618490425Constant1.14818145389219572.4791129899534647
3.14206101981674343.9195923755593231.54707974559019351.08613140948528522.324484773112812.83988486516769532.6398858253938426Constant1.03337853860893542.10552077261916851.98352723754272782.4576806063518286Constant4.52834558551945051.75985176878594921.50364956954545011.05145436724508622.9437818970050551.98796601316640281.69281551480065324.9451614326544542.32299264086365963.0282340224605791.91623426375391232.5761612673017963.9342246738561655Constant2.2219556457984772.71092898739326672.9485805403389083.66515925085547472.24986426828274771.9942827737711921.4133140497326384.372654626646912Constant1.299492083429529Constant2.5346757526365606
2.0293495657591721.85295245894719421.4636322419596828Constant2.9567746099762112.55659535549625932.0682228651630691.06376642413804911.12633410551047471.66775217578513461.42266823568778021.2601780924481981.11494675814997721.70174208189819791.28681787887495512.54079994047602981.0414150896031072.236835273740961.66832813232563981.5842850365804891.99473281578531681.97769778775899161.76304348014726561.70226898035690671.21369958364760172.2958839702412451.23898153216709451.63356565587769031.26950155908501651.436973150250411.47411707330665291.33899975433664051.12757563365496831.20296535734792621.41107908356027Constant1.1796801272965862Constant1.2177988288491952
3.28627510944126264.3410751512449892.7092668058618681.73070836463356012.70877880858637441.6726134659201777ConstantConstant1.12673718028728961.62090603056193382.2862786646784121.3525235362238588Constant1.87350962594612861.11338797484826072.34121709645269241.01466405659110671.58665748549369521.56276236474906721.51712781840345981.86971823345750732.26198396006730061.23063898132599041.45357826152132491.31049482079651751.83925515216835421.01834674224484871.79531718623535941.08147572229362151.2351387433187212.7534027353911221.21815499571286031.13024191916167861.0274692149345453.65576206386685072.48106179018160462.58154061867233951.2928691014661302Constant
1.84061592511069281.70939485173913241.66656279557075761.16681836685572222.05941139997202071.4229350771164932.2774271425361611.11105433365179061.50943410279658561.4703325858420921.44006920018301551.183066612661505Constant2.3232410301065061.36058321526361411.66108959376791471.01300619656444061.37877325565666051.27657033705540981.31442631254884582.03563487668511642.04149536835587941.2566336432517591.58731505457543931.82190497905900871.68577291352150721.09236421967465971.6910464778066091.34741038433961281.58869476635942181.66621478911819661.15212150720112421.36661558775384221.0637452619894881.14750179594024271.26127369919206081.1682542515782564Constant1.1655347045957176
2.5659632519899322.1681576843048162.9992629180241064Constant3.68409416084402832.1635391756716262ConstantConstantConstant1.40817226764650722.3957258830690558ConstantConstant3.417105764255195Constant3.82546881383847Constant1.6353679130251118Constant2.199436597687947Constant2.96767659574078563.9169370884518573.279395284570849Constant4.8760071661860023.14267659650717552.4292769660907954.5786336388087125Constant3.27726873599356154.5932274154767522.1444066424703042.182249623393147ConstantConstant1.7064489694797014.9404772542235124.4912820913486895
2.92064149618398932.81312241821725761.56732174702120621.08768244627044931.8012396111761221.495789146567541.9571310005508935Constant1.2113702906445811.4315278980353651.22993300557297741.2393629835183408Constant1.7521854621461851.6293819615836991.46110191915025031.0518731895913911.47540766011331751.44874002988139221.21025595496957041.9006499463669221.77223593677282891.20190460729349931.34399778635197051.21372807197085831.97696288673634651.0806082705320331.50607935921859881.1140104145896261.18798160173051651.9505474560730491.2140920524324861.22538551181498631.14548818452291683.090298218829626Constant2.2319542559811811.24897394068551763.1528095700301546
2.6730640002391673.12753359285636062.3813715838362497Constant1.56285092739373541.82057296522283292.9620892631751325Constant1.4036681548288161.8183746653518361.60708980699673031.23434794139909121.05296043672210822.1744988401871371.41899292691902052.065470710258315Constant1.6497924560431511.4158135372349592.16100087778484042.634566524538722.2767848927204021.79730659181919371.86689492744659982.0292534847054691.73694188335451671.44704997187485131.9040851397191691.31566586130595581.20247260174321282.63418378260754651.59896838076436421.5443268451556051.251790711122953.5566820519570331.4599036766741993Constant1.593401075582062.953223945270775
3.31581348141228953.6398240137087912.2934974500425054Constant3.369960925053089Constant3.163046246571589Constant2.27616626227710662.1869909535694612.32730350575721362.0832372217167805Constant3.35240472001088243.83330975852834532.21014021287131.02177070655957312.12453387385712842.49846578410030063.61132767634142263.06154803588482863.0883343109209112.10237310321581242.7419778405829151.74891739056392532.00279870291974362.123119262566792.31652101127671941.09778080511008322.00295002244930042.11041160388231441.47603741993965021.93131329084969021.3490337453652473.4775434532461924ConstantConstantConstant1.5094783563706697
3.4067798223669411.7684746188090782.09886386714126741.10082433906666632.01024669441703361.63183380858970843.91211720731624931.88294318809203492.64615589114130371.41359231752970341.34579436209700051.6131527780821764Constant2.15221936797951451.37002275159489842.6330437839394641.03965069747491873.7968610882548621.62889425918294941.11865361691117053.42230628963502742.7913167308175331.56754598472610151.29529468881137791.23014424916388994.1377661475518411.14233048715817481.75654778378906951.548446682269692Constant1.21342221311097421.29298568555543511.15121692405820291.94680794722351072.97527194236641361.23908903739779591.18556962960921291.4252938005684934Constant
3.40560697270614383.2110079067852033.052096716617158ConstantConstant3.35168347672169684.47849842581436ConstantConstant2.4892322076376865ConstantConstantConstantConstant1.8912092858186962.038944617882241Constant2.8847310180713493.20597875332058953.60105292083203082.95737615648747053.410334863474279ConstantConstantConstant2.16694031040601682.165528321992272ConstantConstant1.54379212033567881.87158707285302642.03955698476716132.7908129716751352.041928037284824.798375412569188ConstantConstantConstant2.2195779527088795
2.2053395285477712.2613841085299762.1288850467747205Constant2.5802109010921832.1690294059560662.9363760006789823Constant1.26665817292030551.91166617161756712.28288831659003531.47640044084982611.09820781395847211.92616730107576922.06549267708837862.3384213124840791.0410426926580342.13342702015949872.04640334146125721.67005642362598471.78383732340893353.98009967242681561.57513304491013041.37128027332697181.221256691355133.8521606860679671.20066005008426681.9044765709791981.2163552309356116ConstantConstant1.4304910041067391.48202246299174981.32662344136076052.9534525432640772ConstantConstant1.54739038938217173.1820122846808623
2.3041674571076572.3436377329064121.89724044425989821.2537121152056471.62467618405165861.64221706682159452.570368931336106Constant1.21589208216089161.2537397096119371.28477809632146031.2789130432717366Constant1.9488283540818361.26622247098518151.5699604837152357Constant1.40580907464772991.46231699101510881.4074097119627222.23250709783450272.08843572858407621.26661528969065971.1067268255506961.32638375457802461.69442071069472491.37983617897455771.66248819637376451.29893764646730371.19886542216813341.2722048071993591.45791827834091.5252907883583231.27291139624653772.6956991888960291.18787099083828671.08312270183183971.3446812019517511Constant
1.8596688207281892.0302248524147761.2558090482471402Constant1.90806712752667741.42612731343881951.86757197006334751.20379820605948761.28861010108880781.13344366556656121.72692132520632961.06746777659197471.01555284130421832.1473907781112961.14921295902377651.73967644763723151.0515428309372021.57101267600189081.47240291646164721.13220226245816341.79792531180348551.83976630890744591.2826059057648611.09056248542660121.10125496509954471.6929308828325351.15736969222836631.41808921956248351.2963555303217081.0442097613998367Constant1.35288046352193231.28234456462775271.3474723052664142.4036832315660792.0631318092424462.22439781688988921.1554212187864659Constant
2.47257749817254122.5714686688500231.3636650381626614Constant2.33776174211566621.6957841174525682.7376841012804634Constant1.13992716800881681.59662194190060451.41463198535936471.26572928583663871.03051954901507631.84029745083404731.43647986668598551.56823586305941891.0179706734069941.6863674294117151.93584010215017991.63375039303980651.74037234622256622.65277365837260961.49641913379116341.26383161567282981.2229675496368582.0711979629160171.2229257119295761.6462591715905281.17953287320105331.62636861270356061.9439336387239081.20148855039416881.20859157458806151.27487339601832453.00282927800761451.04755392229606641.0476626932787194Constant2.0074454011852674
2.6885710096056462.678644824410681.7833544469415787Constant3.14069037738108351.5524957504380892.759475258672295Constant1.20612330747511391.9932114720878852.81128115098841121.6024560048730094Constant2.77835594085172041.43367137210803721.4614404452167495Constant2.3093677293252033.08012756047005131.84875630928920032.5680032912546533.2961705154202082.38311336715695181.42231013766003981.69187270374500943.1454856583350765Constant1.6077581694929261.49570382524631351.21253189263281811.31442574136459031.51272644839021282.74014752591908151.20380290075868863.129229573737611Constant2.38819882535004171.1153637804373155Constant
3.23478687231802862.7517812013859571.50566316423888471.17395508306407951.52666161115004931.39597081422802071.7043144179614627Constant1.27925586264073181.28009607316501881.0844471261486261.3091886250274731Constant1.75343194433725811.67741790152297181.55686244740685331.02027589724164441.5044663833677371.60445820385604381.34349357798027351.71439526925171661.4374624317482811.26104375150041381.1740473678402221.3394328273904432.01048662708655671.08559787837119531.27837814508406881.22316674052230771.2871544157420461.9998747062277841.87528394866147762.60186659669691521.15062440209597083.4748610144788532.9047895615336121.764473977625322Constant3.740245667546478
2.83988820636557642.5832579114161341.72156169752578011.05471296264761571.81508939043269062.0451672686611862.04657317934852051.08984279861318361.48923672963582891.22476389321618422.14449171690721751.6445098253438462Constant1.83343516514837071.32495751127297611.7610177373984168Constant1.65212534792288351.70830399835373561.5825708492587751.96568897953372222.02975088222399741.50137151291327391.18828692687848571.69408408371445222.04617510319323871.32464058494640761.45568484824183451.3354342851114931.18909741780763832.58758365068534251.17372470905962881.51196356635525691.2794868250920913.3790874141382075ConstantConstant1.3074956872481187Constant
3.0846816598051583.3194936351533512.2962501168947271.09780893605133461.96275507595200031.96364233846180473.36321381320774Constant1.1050666166112471.57070691127077371.85987107230404261.3832471051338848Constant2.0634539267718022.15360701120521241.8767669964131351.02985833660922841.56538572399160781.53456313071210421.24171867385533631.84678062847660972.829272296195461.53706025546388261.3499592042263651.69439462938693122.05970893878250471.2666737130116511.74838297044047541.23903518864164351.2343834939562671.27981580225530541.49626725620122672.31377675993675961.19089961851739664.38573073416691451.4827611079729691.6098587392858041.232465310002189Constant
2.77301894594954622.381403357048412.58256288884344Constant1.9616420456534334Constant2.4027107560043848Constant1.4201273081494724.6246895240396672.0625232652364672.2897191903660605Constant3.256877343846169Constant2.14016312563219421.0876891206476471.81531106855626281.83456612921675233.78997246738769042.86198060235889472.49469471255998251.87712437382123241.3416402767105581.50648923864042122.8239716881498251.25198604917113432.19189624536235251.14874719386649421.23899818750707411.38575189858534681.91446431050949781.76530061365595531.20467255036934273.06351803461555951.57707495210271481.60123173445151031.7507207209376792.500813965760153
2.93918805041469743.06069874560935151.8392696140864251.20979697345454621.74802660845042231.4827215858959042.7220879319908264Constant1.10406187400016441.305137114098651.57384757613863211.2874665863522491.0275536048134521.79757565983439551.4344511369016391.28417248375496531.0301289288975831.4140170377534521.4711240855044931.3863643547011171.75073924671246832.02787455537954081.36292949594027311.19621773906469661.36600893968177941.61570656232142731.0208220579337031.53293028648246481.15016539626541531.2448060066540881.25777069825638321.39861028140617761.1272053889669031.053033456078782.93138219338086261.0225054103940281.07345713241597671.557160393133077Constant
4.0612714961835553.4036824801289911.8566563183029403Constant4.4317166055028523.896361095184399Constant1.69659024359152772.81229443300915974.2066222006695992.523909468317666ConstantConstant2.54297234176083452.2113048589960222.7245997899313856Constant1.5498194929799984.5327946894556451.65934858073623982.1929693480234982.52897638802441631.2903674784162241.51312998148750792.5296060307269871.90387576550841331.86395220971341742.41852863487386351.96579280637794681.45737746763690961.4544636153700241.63433427060793671.4186765443100872.3753887951545154.304376147897532Constant1.40812188427037091.1783189918695287Constant
3.0159118843348452.756808424376078ConstantConstant4.803412098027816Constant2.0907388120442936ConstantConstant2.38577836832786441.53821110999159634.61373501369857Constant3.1323351454948165Constant3.2180933445850766Constant1.74841047953480522.93829889330402861.954031442934352Constant2.2455749711648453.1527122582214894.025604513082525Constant2.35598278645373731.93729242129287042.1101000817413564.11892007698807651.368535923582242.5980988063201753.35798763581658883.24920293901795182.8105635690413983.289893227301928ConstantConstant1.322304577701588Constant
2.75945469088807152.65660126640381662.5420528156886237Constant2.95961207979075262.85533486939503163.518400399729463ConstantConstant1.47154701529774412.6506983939588591.9794446009648397Constant2.1549012649418081.79471805822810351.8018307571746242Constant1.86472813140521422.05800566816133972.05401884112974382.51687918600098472.45198899517926932.1079965724113371.9690753356905582.4917129617175052.0474977118968547Constant2.0781960016218271.34234362897202631.43595153196840894.1523771994946791.98839552488329721.78581110505275591.710350800770336ConstantConstantConstant2.0466091070007253.1245024545431423
3.21571457378571473.94792894730325461.73258871099460991.0493393477776812.46516684669419561.26131017209110422.7965456101552904Constant1.80981676226139171.51150398905816051.09682360740616211.6190292520857097Constant1.6530117697673961.99539635372773262.10145062559283251.04103545757204661.52275959251584721.63702847835478371.60148010024240641.72763485257857632.02123667060260861.70674875643969861.35736747155388021.32973580401930151.64326075374296131.40565297412491131.43897640201774381.13271330692868371.1623674601373011.36872004755830281.47614588316471141.41949652591406441.0951265791582383.133653532532099Constant1.29153343188702151.10759878429840941.5245002614788172
2.96455342400182251.93984889821893611.8421999491786176Constant4.1411598003876613.2488343656917023.5604175189873746Constant1.34632667452788881.97095938802054342.0495568413869892.691483748920492Constant3.32804643782450343.1472662172949014.256129035705584Constant1.93842510812301442.4731584847005261.88245050778679753.69507741568687062.8176628049413532.26794793533869751.54897538661069861.34827449573583082.0605103946914998Constant1.70426632261755051.36596026005733331.47408144728793441.5520969246561922.47984540910664952.9151381635829131.31832638953658423.10930623396544271.2586718030205021.38676674308915971.1888776754847263Constant
2.725094382495392Constant1.9943081236615328Constant2.81652241819658931.6465317611126435ConstantConstant1.43249843624787391.75712986271885232.49927785749314161.9114570145226881Constant2.0068877011682721.6580911981430761.86427752219767931.0890215951066761.66140937106023021.87640965356345851.93611045200228121.9974241128063122.6679114633467642.1735800729955643.56684161821494071.45907424193083142.51065135124864371.69522595983971242.1547681572196831.38420492739808321.498412555733292Constant2.17002836205314552.11758393096501771.5770831500214042.642961146233691Constant2.6923263046598582.0192514514403052.855917192505247
2.93247800050850942.62439871903629254.589488880763931Constant4.16600938943124Constant4.1885347799956705Constant2.00207358488620242.446649891427276ConstantConstantConstantConstant2.2285390658082554.3363698569889961.18689234201388843.9206570844319153.3001869047919006Constant3.15929904055341564.1185071329183621.8262917237089081.65584201985673231.16326666527283273.3804717612948642.9469379495308122.21604433844638531.33430618055154242.36013009330347322.5429382851007921.51004200251610483.2975270207416961.3702371579230355ConstantConstantConstant2.22660148079060342.3617685276940708
3.03298723384281883.19189240601029621.9688984561264151.3155907841596391.98920944239467581.61125808763114152.9182214507396051.0880013193868671.2684884743964231.4311807186152132.0070473519576551.366783448786469Constant1.85377832801861021.19987797754957781.47028074848432831.02176876657546131.45451460863428821.56651694987320861.49874935324115041.86767598259552162.93123229882752861.39715813541516391.17272861115607861.2850336444779841.72318988612025Constant1.57545146436872921.10930316553572681.26458532172774671.3682666497094661.42152404197116391.18597631996357131.10738809145484023.10220598175769171.0825451105085161.2080904624640953Constant1.5518116474481285
3.36101271166546053.42744433057888862.60511032295122871.15415420175618122.40426106520134251.9010735708300024.43627437895259Constant1.85444598768600041.75085321328564382.51405761683603221.4924909982466736Constant1.80873389767970742.0189924203430431.71515345696625321.01705563658522521.9436054168055641.95318557182467111.79118024617270471.84156170885807963.47875867016072.0722937168922712.103792764667281.54982746604468111.9443938963715721.21718284687996011.92066229869409071.28196938018969141.1852286619713246Constant1.2751231635279031.27173384431735051.2666295697229657ConstantConstant1.31213344310794681.09734941684415952.2668958612208416
2.5071830186682973.42137598382093662.93737856679461Constant3.46568175879615131.7997691166990712.798890354543895Constant1.98047252050006061.66325801447736851.8673322534202792.0423251158716242Constant4.963605915437635Constant3.8192866934888503Constant2.75208688734407362.3591618268464172.0798946511759453.82584459143614453.05678421476069942.4888231161317893.8953134602975132.253828195054162.53114868014257241.74331062538909952.55204891068372941.7777255224800208ConstantConstant2.12300920339595842.10008171654755272.13243313986512063.5335665355456123ConstantConstant2.54502487314989522.7997102083005885
1.66069092949213951.82168142707922483.220632146423593Constant2.8291919910518084Constant1.5747052362313827ConstantConstant1.4244225585753398ConstantConstantConstant4.4974920973890361.6073652469407092.2989690259448663Constant2.3695667045014312.0393068796427944Constant1.51978952996237433.04026230092777672.1434634026661514.248800537367829Constant2.9225750799165513Constant2.20974545350063021.83368123443070941.47242499818119081.797427279154811.57297748013889452.11896709425830341.39397949356146361.9957491657283912ConstantConstantConstant1.8356537053097803
2.3807509056107552.11159744256974952.25698155700292171.17486532773748253.42877004888494331.7943427825120053.57151427195151161.0947422865119521.9772793734765251.33782865045972051.2266917327527741.6956908322339006Constant1.87611386528051381.20411708123081661.46606473940562651.0103397842823391.41329557969541741.5772959059080911.7761307829099792.37152250709028232.99622421976828251.33349611812375881.21607876156324162.050078242665572.0391443573404791.09462608085198171.87003713837214771.22614927323765151.42933489293478091.67788902885585171.21790642480310621.84573411973062921.08350313367205532.6044862278898741.93068993852466591.4461834273198484Constant1.5486375029156014
2.4181562610508572.26834428799098031.56307217174856231.19922855549648081.95273491191442371.80689388700884052.2248858096602038Constant1.48178381040455441.49300549179366661.98084933779132521.28604786488219911.13650009803945662.2034682028267981.59206498926416382.1564529819193091.033201986108021.91811106078765331.72349720583674171.31431225654778651.50969437239861713.09012777147187161.4615547405090161.13103379101591141.327413400244832.07471279109310741.45126416743884711.8402594103297721.37209843348500951.23568169090314321.17908362987898441.76812273665187011.62641165974656141.20624003552822882.9862362560832231.06191233234124831.18741908518039521.1302101889646956Constant
3.9816298620680363.70483399217258352.5109642425485577Constant2.1368820015133211.79393933940349932.5291917784725175Constant1.2544708502530641.6522004085228431.53067919166027671.5095578482943433Constant2.258068033812823Constant1.4275283571145005Constant1.4445539720279221.4161735362129091.2801003729310342.36918338845202172.44961571646086851.52306479758403171.36328768153185751.42524050899812731.8874206903217916Constant1.94079163274597871.4579562805504287Constant4.7281203520524991.21744598592312481.30539528060964071.28668679149567884.299811650100719ConstantConstantConstant4.371313468027885
3.2562665303686052.5990973797700341.68477279014104651.12435188017181751.6804283427531191.56256516336373432.771766186748579Constant1.73521767534536191.7551894676142061.37774065105420921.2802490808410005Constant1.8887772911103091.2759346920274411.7115420851231078Constant1.5989883258795551.6708753572728721.45639194864148711.7958889137445852.99200295315645761.4552923766039242.00536220126088961.34914915423084382.20337181949124441.08814122867604211.71989722885593151.19668378115167261.20038127264746792.28566320833660352.2501191415694522.185719382285991.55768614763935353.3285087169510661.0926849362850373Constant1.088017406455896Constant
3.64986789170593843.2525067177119952.9795777754663661.49813985395470482.15367595874989842.1175714093438142.391962575753067Constant1.40812259996060752.4439049940496073.35775255821748431.3497336243429277Constant3.0482680040943513Constant2.4672050646332817Constant2.4369910902825531.89485777095594642.66774797398445833.7171001728653971.77414692594098682.3409169746813891.69624710898585331.8462511867631471.68520213524736851.43302422426955281.77672001670985111.3432706213524311.22190586699148841.63417317411357743.4099981649419218Constant1.2332927658744097ConstantConstant4.6463059619429791.90202300622701513.085498169370939
2.6029150771623562.60611194966436571.5194220355198831.18601432022233572.12773037885881241.99206276998270633.1452478565970911.71686481258605951.23483639567961911.73839206000581342.00201041116999431.25210589394823681.05757963403422452.1419723741727821.10976580159164721.88985255644172461.0211283218429561.51259339428227021.7565493836578131.61117911414807622.32131861002953243.4905034658060341.42019718229284231.36356501541752231.38260051767575361.82299782483711041.74837011404027191.51758074628539121.5088112410781951.16949877143651972.02988661038256432.06722412846615772.2267088300769341.38172835812575162.80930584537053731.209774605870063.10771131970845851.1041708386874114Constant
3.8059060101451034.8093824327593912.677738545019494Constant2.48656033043007741.54814907011462543.1817364282832155Constant1.08452595330838891.52454685181279671.6892708436112331.6050608477366535Constant2.02039313560325071.3663079124365271.8423830337532191.02441766465244431.5071779025078291.96268743328623361.25303210743258392.2209674876387172.46379267185706171.53242346405808981.48930447637611651.43815844073694191.862351447184979Constant1.84450884607832541.32139641392891471.1750126685871516Constant1.87584034063570871.72038790982283542.86560056527527834.3398356249985981.0477816603046075Constant1.28154657090677663.513480756828371
Note: 'Constant' indicates that the feature was constant within that cluster.
In [ ]:
import pandas as pd

# Assuming results_pd is the DataFrame containing the results

# Create a list to store each cluster's data
combined_results_list = []

# Iterate over each cluster
for cluster in results_pd['cluster'].unique():
    # Filter the results for the current cluster
    cluster_results = results_pd[results_pd['cluster'] == cluster]
    
    # Create a dictionary to hold the results for the current cluster
    cluster_data = {
        'cluster': cluster,
        'intercept': cluster_results['intercept'].iloc[0],
        'mse': cluster_results['mse'].iloc[0],
        'r2': cluster_results['r2'].iloc[0],
        'adjusted_r2': cluster_results['adjusted_r2'].iloc[0],
        'row_count': cluster_results['row_count'].iloc[0]

    }
    
    # Add the coefficients to the dictionary
    for _, row in cluster_results.iterrows():
        cluster_data[row['feature']] = row['coefficient']
    
    # Append the dictionary to the list
    combined_results_list.append(cluster_data)

# Convert the list of dictionaries to a DataFrame
combined_results = pd.DataFrame(combined_results_list)

# Set the cluster column as the index
combined_results.set_index('cluster', inplace=True)

# Display the combined results
print("Combined Results Table:")
display(combined_results)

# Optional: Save to CSV
combined_results.to_csv('combined_results_by_cluster.csv')
Combined Results Table:
interceptmser2adjusted_r2row_countlatitudelongitudehas_Tennishas_Parkinghas_Alarmhas_TVhas_Clubhousehas_Playgroundhas_Refrigeratorhas_Cable_or_Satellitehas_Unknownhas_Gatedhas_Poolhas_Wood_Floorshas_Internet_Accesshas_Viewhas_Elevatorhas_Hot_Tubhas_Gymhas_Storagehas_Doormanhas_Dishwasherhas_Washer_Dryerhas_Patio/Deckhas_Garbage_Disposalhas_Luxuryhas_AChas_Fireplacehas_photo_nohas_photo_yespets_allowed_Yesbathroomsbedroomsscaled_square_feetweek_1week_2week_4week_3has_Golf
-127855.001688777411849043.83347003460.61051007130301060.60720400859096914397-247.74335445340458-1149.969491792079726.878714865526863-17.92828290721685-439.26816714293295280.4312786674494-56.86077813483782-128.5863453208082549.902412869435736-111.9267521711532826.719641482832643-264.3020048784857248.56365462463296-142.5783903555907242.81779873197542222.48960483783654351.23578532186315-69.6320479610511621.028659498150024-34.589728382701541778.7385313485897-168.9514830872205717.489561350725868-7.775955952677445-296.48806302084824636.488718149034-168.87766162797365-17.11067666206909663.012089056794366-45.78148282637764298.00692137432816776.5531355747135-271.61101464536426986.0876347900331-131.93238834623406-708.9194586045452-371.16525290589647nullnull
-5068.98198093209462606.55452760490.60745766523449650.5876100190946676749-290.0522563768183-139.5256890963922343.4760876728530431.0521352683546127.61375087726749-32.4966932019486273.1533616880283144.57785590707506-33.32375163278415-73.6759933814315120.8872980942828-25.73095287945995250.645735092697244117.61486299506952-25.432871135755338-14.5146117997188711.5047542886936109.2597790443050951.145623429501136-5.886668322037511null24.212583167414003107.75997286996176-78.01440322260069-36.551375794745546-208.30970783903626-89.5548045841247-11.55000043810942138.1809146565565911.03124415752615858.47329916289276212.03186825249801-79.7907113054889247.7866121926943137.85856611485303-320.3354494301346null100.14794238989678null
1193.660026932374437715.095789719410.57887568435427950.5536230798887514602-64.68563672126847-14.950512997453888-15.72340790576182185.6693747941552null-71.401956439614489.36080427114875-77.718157837365-3.8469694013016221.13915191364428443.9818925790987761.8416691547743879.1960034587785619.812706800696362.358233832171756-15.219874830637846272.56413772300334186.118612364643551.11004961737681647.9992683806744695null-61.868463517560244-13.89162873228789-48.76816202129029-123.36714807573117null-12.35994264486864639.07586445182439120.65031391022946129.98192093126423115.876090589443842.44105614555956424.07094716121875174.92082829063523173.58777632412978null220.204098621306639.65413841964755null
-263846.438935984862251678.55383271720.50933695712630230.48101475340348354679-2590.291689070392-4059.7187709964696686.0977252917683-140.93479266716324-1454.0941111393681430.6747646730339-60.057967666549615-656.54458512195872.4357647115488-635.922423574659256.74497031906794-333.804514022356509.00748501059917-636.120234922348829.185229925518478287.9261412928047474.49364464081006-592.500675794562138.43417294147037-558.7738356926156null-26.907470524325184-146.6312077483235119.52644402234395-802.9846734662495750.705291200254535.577995966328535795.457881044483-346.8581323066133-338.9204148021711434.91883921678576-53.975796766535396-40.96458078486021825.6423132940197652.7509762513384-1439.4374615524994null-371.28917272007661559.3943863547863
4044.558100357204226833.939661469420.61737423724651550.60465537532673211120-18.33710056515661831.541352899957218-13.99464579180585857.106376167142805null-47.2283000182365233.66659307406798-65.8306759955304821.40201616917002546.3452357071600668.92569917792453188.26596826686722-9.992078627668675-35.90663687172893.3860012781204154-143.91886747118378.64769877619648144.98502659220807-24.14022850853517714.920365822399603null-35.00695038972024547.422257579712294-6.1755598711497806-73.9184655924328459.4591759120248-91.8585257885176115.885560930695526-1.09040038161635520.604818011831117-1.1075864223272593137.7309991964705-33.64456903591675207.58847706895205-251.97203070117544-15.453482570426681null132.14487897008684-5.624342966347403
11039.8635349844923716.2209905001150.83938431665108280.825287727420991741024.484436665550056124.31503642031353-1.15512798305883-37.553238372566554null48.711510070391135107.5773739953951-96.23206701606146-61.32849366380584-34.319346879698934-17.43957445461286-4.290532016485312-167.3434797803511716.87470201605409469.2433975786761null55.5562026646891840.3335281980726440.47353677134998444.82330793028684null-24.5710599242089472.46506455070752-7.847408830123712-28.976933855321718146.0584674289057-17.665002186663116-66.2725198082576-101.89726611003604-63.22359000969339691.29494409499446159.9603388913829518.215803876292537185.561874969594null-145.30265858365541null-437.5496918329505null
-29325.6669289979664063.537995357540.50975682246917910.49181466096643645964368.02098579405873-169.00160005613728-7.958799525402349150.16300513757588-84.73167425639029-14.326475640146581-19.005652160864006-64.9388445029514450.77858889014614613.637131432018123344.17503773619126160.47797071338348253.46181258160442186.20157275994245-11.573857140787712null-16.71163670994703-297.66410899852013-44.27171063131728193.4325459911289null30.997638094400287235.07691689911252-65.6998083931402-135.5330945015223463.34415067151457-184.50190966904756-110.2593386586395276.879004405633941.19494081236701-1.2494231052377622-170.4737687733358529.994665216200058257.3462917735648null-8.517293932569864-47.73447832307698nullnull
92559.24825598963528643.62830436010.4502727761597980.4447769892692043373935.06419302769481257.968725626444-61.2232811036945127.438560087558596-77.77867874746664125.87970173446817-223.07938094506974-115.75316348006673-157.34632453542997-153.68373306734247135.7028828493121-313.27316176360211.3468059809229391-48.57915479449336107.56811247263936323.6778810916921-75.358716138840424.585270190348002237.12111439726405-17.336233813008985822.1184121575508176.3549077485661-17.788446155953284200.8758559092890413.357643064258996235.95788369423593-76.26081423255539148.2227552814872549.43845357746455-24.56435727511791226.10037087956388678.8096540662162118.6178134771407955.97787712178474787.4843293653789-41.83390121975815-67.32654818741617nullnull
16453.505092533036150688.659375308020.50361667846930060.49653953012272422562310.34418964199665249.4549931786867874.49233567147385-31.69703425286225null-12.16682381080922157.33309089630378-125.77436910095471217.13709546144764-138.41410978874282-54.211757352622584-119.62494092384581-175.77969343427895-24.71944997902630826.295376603315116188.6483607939574-115.02418945214264-49.0095969690413250.25481837225918156.22864115188804null-123.05484080199962-6.636129662881369-13.171810127073782-90.83875371032256-157.51195277012513530.0671208989812-77.76033314749358103.904109288341310.036071450211592131.64567564253576186.95202642409433-94.29152355754502376.6493976758226-230.8068613669354-284.8185824760315744.86029212331485null214.27883100731992
9668.06241221270385585.899042182510.63376594557032130.6078782993112588516-87.2256993704669867.60319298910349-67.79796305608309116.43729264076882131.46771654831286346.80564020959594-222.5238748870911588.77091616405022-96.46008028103017-92.3878079551370630.886439583090482147.98750995918795133.4361881064154410.42892048298133896.3654573329995325.6431015775536228.9826103445607107.8386282316019-6.48096670771393448.35674835515252-154.6028537351413-50.550505956193405-48.5324236520094-21.977610857017194105.854561069028null-93.89022895607897-99.69307955477058-139.766267647240963.03735236220817.651626271376036149.65594056219243-19.761643368800637259.7103990968094nullnullnull0.3448456984993125null
-61167.4659188523874242.40022575990.69382639197058780.5939871719609968123-303.66847592003603-610.5929872358149-88.70698645992778-140.28520335707634null-384.53796927407603108.99077374202825-30.843363733502056-79.08651390075497221.48034328541624364.932765935026-114.542059682348376.770874411807944-31.666810980707837-184.18274811921617nullnull441.12205179616035-34.07028407427322209.43239757104467null224.9283374030274-8.620649512490886-136.7726787665043-284.0859524581558null-180.10996426366006126.83550706360425-181.3528019794819132.54800013795915-81.26963290031074151.7469530974482370.54866532992531nullnullnull-224.4862517772138897.27759226885229null
-180006.84738826434592664.17999954240.59898943721545050.58372139787091928-1683.1995813740327-2002.130850480888151.8677352205958810.777584221400657376.13951520209-118.1293653188875723.477112911451908215.9650490588718195.17615713017096-70.39059222099891163.41864397869963-149.245054914803-72.48919216732278-272.050117586237685.74590284312059132.91207367196253241.39719124905534-333.30770483248032.58505778434858136.12896235054956null108.8616887442779615.5063168498915357.458357342583648-249.11224782743597null-96.83340031721718-149.77445114272209-295.71323476118613-93.11816232053809259.8798807967962182.79680276947120.4743602342892374581.5331269201703-409.84754346678676nullnull357.1205609313438null
-23257.59239568993584702.74651133010.49803397117206030.48965623494683742194-185.99342096382225-364.17219613921554-33.82125197114632464.38338089323014107.32362376641339-61.43562138640965-52.09488948516729-125.2731836232955277.5052149106373575.82517516008746.6190319805352120.1012711071601-100.68705731686772-45.304297945679540.66135508575728216.8051729163172304.16221458304125243.79690410298937117.68070718835003-7.6410786875973145null-26.93313713779452-105.14122901954904-14.65540312242747428.34465107934712362.59668246194059-59.98622356178887-63.48008156162611-17.0391167152594646.23461160596018152.04841786347387176.12743861908692-65.20633037270035259.5019027925151-618.8474848238963-176.89423955973055-271.4205302989947nullnull
6012.10702692580786079.918394700320.71843875306792260.6848953554189747311119.8220315576501587.04996143775608-120.91360981524281-82.8153983069178null-84.12208206650389-25.719034617921096-126.74359078848154-89.99150781114581-52.647423987936556null7.60413115423406969.63125294760033-131.54058793048935148.69458771388793133.94886018276154184.75962020964553-67.14243935858343-23.69221237546370873.57913440889925null68.11087854538195-197.10798303254734-106.3144260736546626.745247237841028null95.82123834942317-37.53471815291842-277.35995739390324109.85540365728865-4.193938088928431108.39457165501993-68.7144973433553294.5075020051257null16.08791012337649null-79.27035611565441205.36126532785812
-16995.75255315775687758.784189720490.52596207984795920.52366664691436077679194.955020341281-114.02816715960445-49.3129000286463456.045589136922976.43163754088013591.39987079923625-44.99237041765729-108.8783335270490877.19875210262927-20.7851975816958620.533373437391006181.7553868403797459.1240427509128839.49748899045819-3.733264998181936499.04608592959241134.1957385587588-5.935189595529877-41.948181952668155.966523037731125388.5981347396176-157.3578186804136-21.9053210326409522.75324610179111-32.27631502377056429.314990347780682-33.129418646347375-103.8892404362632535.009996789634926-0.599295913211563510.322892596851236120.32814774418037-92.81846103288413329.9119581438271-141.01346529728673-195.17350197807278-13.466307777521719nullnull
24895.940243424604352553.05507538560.4253314806058880.42152904220488225782-200.46418993331463221.84378575756338-219.43837974958353-2.609730427538231-944.9467973442524-44.6401625713533-77.32519088157575-136.7917717077677-37.867701053984675-31.6919076606311182.8900376233738128.6084337667442641.12241244716384116.71600059170937-31.141628065052497-161.51430963195412340.6215960427303-159.07893672404506181.4118932339581259.3544921947518841489.802270134057424.437849459816572-39.93280617408875121.82502692448486104.21154862264076324.8178881327685-124.336569589666173.64460411414917-62.68914872770554-103.86842345357242273.31839960227666380.785254299341166.31949691500238230.8263068464096549.0979090555306-70.72045749813721-152.739334086386null-1003.8367850951738
-608.796078544122818108.126409079240.78854624650043930.76594813543941752916.953038931083352-4.818250851030314-303.7659734637685571.88200311497046null28.918461349577534140.73108883500953null-4.1545191991286075110.6072131812134738.74595804465225-52.4094349057115-26.92922368437239-183.1257536563169-15.910475906885573-19.710554989365495209.55162259306223nullnull-53.13079342947595null-147.645106139962988.64426854747802210.78423034938848-734.2944201932002null-213.2597778260777-229.11847303960175nullnull271.397813993775832.64280668663080651.51212739298165248.75655656363386nullnull-45.65958305224388-436.00385788089653null
3996.328057402700319762.913584570210.88551002645893180.81354490023311755884.11446247209567.82335140325192null-104.8061739031535nullnull-133.7657970910105135.78825475787332-97.99840434408993null42.054965898294675null243.97431674876586null174.33161420062262841.4172223128471null-62.95980204715795nullnullnull136.30751281265836null-217.4752372341451nullnull-277.20576498331246482.710031506694328.021660929083453-89.79455970624075115.1198385174051135.20108298189241-177.6355387170641429.12623301843496nullnull-43.36512732172825nullnull
-3081.24395107432818040.596953704750.83032964710124080.73842487261441297575.757648382226560.34591090899183286null-169.3571637683754null50.01503855503857null-85.81541216303773-42.256653218339224-90.87499255627863-0.23425724075599821-87.93564474924786null421.5006626581512103.93497132707171-171.46237290404719nullnull138.8890765787913626.91821573903596null-152.962393494749648.4952675796356861.23764642594715102.9602401099396nullnull-434.857760400676426.84451687295537-58.0599553408297795.7195217350230269.0100957147837674.18688556792297153.39557068171067nullnullnull28.221591233589788null
14671.2567611702465680.318002956570.54602744615594160.53298630912781261326-118.09568489818473131.20632881799136-63.1553001608684795.17579462668866-69.11442893150173225.3186381762179256.57780279433908518.588683688195204-65.43175354828827-44.1672472218229212.0602608064361653.1783606703935630.91416131222941425.36466196816403624.242533160434554-3.0741777962923225-15.426658285991678109.49728329961675-26.1719558690236334.114051997594395-77.56781339080983-38.98599713343423-52.16267963269723-0.8318097454418678-51.18396270477531104.79702812959114-183.3405127355543448.383510100756375-82.63946704374918-62.57237813099958110.9889624377582135.3311795824774-21.969779040791664204.2020233049446null-362.0883849163261-175.64104319399772-34.85472661665536null
-30615.053050912626191282.29304386030.44742893491136280.444085884366327346320194.58853701807857-219.27263897411314-124.8280557549765-32.5298042569493-1029.137278786393131.51904945559323-24.19172540545493-109.43630130796986-1.1756584255734042-38.3299685013980181.4637249598595514.37313165210373217.02340563332121133.743129349206-8.85758536765605295.74327066855041208.759193706485717.56358232629016341.68905054718938140.923793182225721158.059306034650559.400877794339195-0.7777792568137267-73.27629492226032-101.444719168766641.54952145501563-174.241158701971830.76528311173076752.081485579696239.362368960955576302.42865566474995112.50325430040922-62.93045180814115429.26195802324986-326.8534221174483-319.30261540569785106.81704131731165null30.17979474318825
-7854.16912292187245150.0601276512460.80022008438457610.702368697144368674-136.05222240418541-126.84368810236342-701.9676039056918273.5475717378171nullnull300.8671009843997-396.45299943412994nullnull206.81063712580627-266.4090226240954null-514.2077251056334176.10389783112026null57.72779616907995483.6657644822021null-32.11288518715441nullnull204.74980872014223-226.18261476882117-200.3343809631995null194.27770385114633126.85903459578421nullnull-54.230232775992405207.1562019462489122.3889671273643null21.427362993212533156.5209878807991null312.07638928590205null
3032.0249267044746165738.233474085370.48910646600195050.472237339879373441096-24.19080325698979522.39213185560908-7.84528548214639170.76512245057583996.202558910742-112.01611929806975-86.40263795606204-51.449246227729255-9.00363513025544848.1990236244891819.094060268044057193.84242691100948-68.0312285959456-10.09590245267758861.69949566246301-294.5673585478298788.886660646891685.869822352860435.9735312396012-72.72970175125054null137.5996928825993633.49569284483767-54.26367716823941-214.90639354001826null-95.22944975852776-35.93676222939371118.682905278815253.27775644531166-126.49730931284297297.3065691412257-3.040757904128354202.14458562509282207.77154031510793177.30024899671227209.07701641345682nullnull
31257.96396741478101247.171857975030.49924270416756020.4733786257601459734-115.11448111704476329.7514798406049-1.387366035540982385.78257609992698-363.0232158785785559.4018246437148-50.0319110471260627.094624033213645-145.05254157216078-51.2513381938655441.38691462187158235.44690741773448131.0847971825296184.81277124996924-74.59319529226626null61.476758916369256-37.26694278452037156.07533981596146-20.55148865330696279.1585358498364-16.731182693235766158.34185485301737-85.3195916944877.866195854852455null-89.71085979936935-29.19134722346237129.168202172642195.1816622821747456.57761956134036229.913415380538346.571820140119768153.60187195578035119.0013254716772347.654157753268855355.32630294098095null1284.161675661168
6142.205357221062582658.652306490390.43241162649599230.416153829782313861222-40.6829151402235851.74555202993294-59.0571012470821190.8578022814816316.076963954064118.6847636396458965.2366272444654-200.3361815795781822.43509568024956466.9089516374852176.9168399424788-4.660180733599853-28.473950081707596184.6960127792197610.731625392976504null594.285319272210580.36897566971552-10.73342498209394-23.137224407738596null-145.17700938125429.4988802192557378.07899766380167361.97023313686005198.0649637597132-26.7038429487064456.9759192937284636.2402084565255444.715627990132369102.3755057639917889.26581810719998-96.07550741818837293.67890504174585null-161.36999344072842null-198.2520571323649null
-27996.50577452084343819.866783397530.68360931465260230.544841470201989483146.7669605398766-192.25802098155438null-37.913037716492056null3.7669269304666813-75.21681367798003266.4134428988489-195.06000159835386-350.7107940772106185.24702811016493nullnull79.03923012937774277.09417433352195286.58838372997013nullnull95.199692167772514.351158415801581nullnull-153.47462944516636null240.58204606353547null314.7810073184359-41.55035110499555null115.72541676238077-277.46363227975564300.112163541713323.041692688710867nullnull84.8837498277975213.10460655657775322.0993393971258null
187.793465153064366497.993246515150.68548145411230110.6712221936122111808-93.12639520198636-38.67280339515656-73.24849151812116-27.989679269085947null24.401812106082076371.10221938395034-231.24820672205706-41.75207460588194-72.20848311797378-143.00725438664253-79.14068772980686249.1884505429181-50.62987219901378-34.90749414998128-201.8413113479131252.80864194755043-129.38867588473056-166.10350128952552-132.01158490205282null-171.613990020890557.83847224259303-55.8176226113084744.57385137274664null-104.63388845083337-56.780055780624004-23.195150266464363-30.04843742249059336.76948344087037108.73136840647882-148.3012013228779303.50508481220356-116.62402372341943null-147.26220450675828-103.79772991830163711.4694095566483
8025.50277618811131064.384277271230.72082419745146620.6917617322019343351-145.029850928963118.59697111344075-102.4428691428858198.73401267121268null119.9408251023919383.59416000930153-55.38035695224312-102.94935571500281-51.3137576000024878.50379422980483365.1318354315521-60.51710351349428-134.412831105179275.3547980780801844.135094079948516120.4034602402705-27.20745233099757250.52429902485757-15.077127543839264null-34.509234870656840.21654648401348697-4.58741949999469848.07732668923542-97.65123666035034-65.743603717263649.13222321362212nullnull38.123771298142046113.851402449200042.925831731633209210.9051232948858nullnull11.979695713433555-0.0908403462097946759.840409228917885
9303.281001631884342705.125375785170.46190591029761840.44901742910714226154013.487314309174867110.54062020297023-275.7685583341575156.5414335937323null268.28007769363614-248.54029046984655-272.4094797197432-238.53181882052982-0.4881211054426734142.3508718147101-9.016692229864113267.709550454709638.10235718229619115.55059032537551164.4733904627463484.788943441959147.62183546356746275.577898451678550.630687434581348852.963754151981256-15.56572065913419785.62588261701696-9.551233473954115-23.49735139812597179.83071187737795-86.36487173697863-19.719326119686357-408.49469195532237null20.835756192426043219.582367530737747.38869189179017326.97264638366727-77.19644581155485-26.787219493334995null63.9419604075221269.7399603984487
-9259.24979741600923268.8713129068750.7511025126595150.7365329036444622652211.114907779793-23.92897072224075-63.3806418426928117.16780182569347251.50523713294965-125.134448107478422.8737975041749-48.65801123510177-36.4928969415566366.87061971754764138.9355733386576694.02102096352023-2.407818708920105556.08485206255919-34.177350476518742.1821815843720676-78.48956105797069-1.43545810901044015.668773525287247-45.628011172098454null-29.56055481538751244.6047431745969-10.701782474427814113.84189175222005129.2403859232075-20.818652515642956-1.1191985881132127-109.42262363785515-55.1494263243247493.1094192868917436.868457410908796-22.160822253174317277.61415416317163-222.1725873572515351.400069980289985null-53.745017820745836null
-30332.0327282972582076224.077897730.10962184253729790.08241164417680781215264.6322748719414-188.1981602521593473.14077055465641-101.14640249643284-217.85324086477013195.42872700742114-34.842570866614466-238.04744296733313-18.368029215260925580.316439634901-124.03422938626666-252.26796556217988-185.8134273835693-154.8893476265259895.0380917912986-82.451183159893-563.4620890587103-275.18644543714623100.77990522323705-320.7531309598197null-370.7460987985992321.468518542995447220.0831305505960849.490771787953406356.82787386107543131.74582046622928-28.876448453122922-170.93854623051132-36.566119406681715190.81851333435776269.2588334924233-473.74524413591814475.7700574842494null143.2747105967998231.659849642138436-266.38856949071345null
-6458.893928382216541091.533924443220.73036918649410170.692181131362364926731.27284644761019-57.55636469147043598.4710245044980387.74375390246922null-111.6656286316492118.5042261315421-2.602048306492801330.9436795388657202.98194084846907-34.296747120315615280.17423412880987-111.2244068198239421.44949018167349-134.0888478284385null74.69840964717638-146.7159757694125229.94998222589853417.38282228856936null1.314432057731111175.950465669525246.197849419984682-104.52331731289512null-129.9531594192527819.392350480097033-80.37630076977145-148.29534857931515100.8295178786559999.56754523585414-7.408181543644204212.00360346566777-136.530856191171332.65715690208003null66.94679306137748null
-28857.9741866340812700.6280440169720.83286989593346980.8056626696900813201460.3328095463925-112.3671717534905-7.9995189937030663.683486682487null204.90999835812846nullnull-76.71625472243852-120.9313648602805121.05601377686887null17.33785365187421655.27983356442075589.03907635023197-15.9958528894049851.72138121057969-57.58365533006031null-33.679257079332395null16.737879796711727-14.8348652016683-28.93124662859309null-13.69087904075076-35.22751443228346138.4012693209598-34.94317596056046-36.191459845492830.28774902390463165.98352645448347157.01594115184326nullnullnull231.9714351375765254.26625745753842null
29927.57684826570867998.788696501720.59239160334883920.5534627115338407391-129.2572778404323310.33847603808897-9.27941557266842753.518205457220226252.72025770568345135.4988159090799840.44308065477281-87.21958961346094-84.775683803825460.41768908448507661.89928578216759534.64847510894746142.89384392273843144.0632104591275-32.39694757394898-41.70465463084083217.37539606057874-125.87817011993006-107.45184437043918227.875079342098null107.40916804558208-48.06299532554152-75.75435470033362-315.83709879125473-357.85691748209706-190.9063395344875625.276423494439065-39.22743267365039null-188.11125844724276316.7382409285482490.51959394687195nullnull-37.090035063242325108.1541762819505-175.39888517637593null
6747.194051885221368750.73466121160.3948578915021640.26555402216501955143-274.2424273812633-78.8099587658808null23.23074453017479null-288.06189596435627nullnull-437.0392939306644161.8885877752311455.177922629179534null-270.86243726322164828.2079231407806-36.509993727885885nullnull-370.06686387998224692.4028723644346-45.53583899811734null148.78872311994522-140.8373931737015-272.9704898646601nullnull395.5944204558573-269.6617452316264942.315731663709519.593554664592265178.92381540725296454.294232821492274.2498174108291-76.82936750018803nullnull-187.43701408411886nullnull
20404.82528712840655978.1219671941650.57883270669568740.54066442073998413508.5715003314492246.8641014797283-32.11794070249851-15.474037206991722null-2.884265254750753514.025558354386042-134.02994120859384-137.92731295699326-118.479415586598487.7467586464894-86.7115006175985551.24317401778327106.20274386297797101.02052642704834null-93.96144970536378null-123.40726219723115106.80666301972413null1.6805930026255604-39.048757322046542.1609984550504297.9093158611379null-63.649470001229396.1563090801360785null-275.79720983142-73.62274338360383-37.4313960836309454.647156541792114268.2711872532876nullnull-9.821855083422458nullnull
-14855.705798025252260002.560840183840.368463202827344240.33761226733327776731101.0267606132773-138.43077668323377-162.42215355923585147.35409205985917-253.696618419684213.410653299578506-93.77923761106112-160.5183910346149-14.3718579541665314.209871807437507193.1132251706099267.5829390586237538.67802380456895122.3092384376552126.0179159791271176.51100569450688488.1036211464090711.8824325242982275.5113159022483442.662405114584196null5.735524862540895-121.3859465548952410.53109977667412293.8388907228085null-85.37097861486075-28.264692942833506-216.37065628948625null-269.2785009873948169.14587207039395-205.7058580281176321.40675731673866-258.44862491881497null-135.6295990377631778.5230945519762null
-2601.160331479661111943.276771278280.62593117971845950.6009005523390255575-14.420048358826417-38.049162118015516-121.675188094904367.23682378601418-140.77980352683454216.23810534222946-28.413481447501542-251.5473750209607-31.87419839189405-64.63383997173713236.02173824695123-63.2981404851901490.64035107832916-59.63943547944660454.22225516652175547.445471860762204-89.40957080081249126.51752895995408-44.62123575525948.705121595595221null41.41920128594119-42.46839719139744108.4161223638913282.42634596507212-22.41081620872102-130.32610005850083-16.274998290328966-266.39410690203067null104.99085957230302275.77253485230237.79752357143113232.34654477428998-331.6109677118007null-57.026621491833049.293048677031786-80.77129599340417
-2291.0158803347786148537.65995122830.51643473409329040.49145862279825236734-149.8063250624696-116.40532041088544-68.5095730100327414.412605642678693null-16.10131066688136-84.99703726746205-139.1701484278224821.818564091418676-75.62080650670337.137292628352109191.80616444223625311.98307437968157-117.04508103552321202.27898746063119-157.4284140089588411.565568415553402-257.1695170431812-65.41686609928874-92.56326026882023-156.6126667614563144.0039092047170590.92887052290435-46.89370173044498-141.5106588380121834.5463792001327269.80304416058198-6.583527272331717-22.0493484989876351.8897807835727453.55369119515012564.8974670664126-22.9599336820545247.9429824949967380.57316947543049-134.3967841018082157.40503009786002nullnull
-2432.25994726730549270.557248734660.54230486470311510.53494247646777652275276.676112455057796.36471878566107-115.390510690492477.0911830291169835.8389167576299816.312517684106698-50.36465339390301-117.16305087328762111.78901090671006-70.129252495677558.025009284699125-29.41602166434913511.29261640969005149.36695292728671188.1598902410111479.6703304487409122.5604179763278165.31192797108626356.67529311628662-3.4919146507766095939.5122818216962null-9.83193405810260631.759695233419453-172.20755589283388-18.671965597779625-132.997061805827451.53356294155423-81.74152620331348-11.23398318564735631.143499740408778132.95987225061214-45.88474628275896238.85011351894985406.74908165487767-253.29246868654428null-85.85698032225null
25527.4250891252869094.350791540290.58340530227177360.57470033843864641711170.9589128019065276.25276311801804-29.9467840753190834.73327698061576-27.948216296020018-38.16146107689781-36.07678072582445-49.321353408116906-82.003706595156418.66477223325428655.3628179987887443.2487860788582978.9602345267822435.2201598695948121.04463615947319398.52219205158589121.71420732376222-54.3390977571265999.0482318390596444.89897324770768null5.83743042639542248.8017100735779813.175859431233711-42.88865275733101-485.44159943513904-132.29773171008281-57.27322231078925-39.298150359409290.9248955672491832-63.28265400121269102.45295827011324-83.56320249186558311.8197996386219nullnull138.59710057919398447.6277669359927null
167884.62003144095400499.577374808840.59698587024537140.5794222391488799887-688.62190552785241864.4198719279639-68.23337572386403187.16875674327116-83.7799029807099596.74602203598529-194.6970989907564473.83417577897073-184.86846069142234-70.5966955646131639.267113356693336-274.24614257925475-72.09830659602031184.81369158985706-116.70755355831218373.9630830426849433.26523303488904231.34221043915005204.1902346481674135.23484490125733-270.4495200328214153.53371972182623-80.8391252551126556.9884058554345573.25883379788755143.39321292349723-132.06658395210087121.31170490320582131.55491037447345-140.72489989012698-62.66854584635599108.90042180082206-204.34146278473807713.4132993153821413.14657610857745null73.34159383213772319.75381978573665null
4937.383071239795100476.013384000750.450372240577627040.446130224078121864832170.6437645639239122.54988036247963-18.1742758512611356.40615756534723-50.5306756866171347.8633083937715425.261975627810422-179.10255591300768-50.0261344241214714.37738279914465732.659312572889334-0.399252744257557942.48617420119763570.33608873285671.09454076576341150.4624600514075290.825400632767759.041232923589121.4178823765833243.6652584044308786.460174622933-110.26215103794065-43.9767339695830946.111898261643549-29.07183612578812892.09806108292577-0.4471237215202691-17.39084473403107126.3319322206008261.34325083535418191.49873036766218178.54277904414465-66.26158769238145159.9527566871785279.9332331435385-89.14033462104992340.8773104136138nullnull
31020.13110233372642440.4286923505860.7094878513009920.6866466012804312440-407.4555791392279121.4880814080857220.50507628028511743.065525321979464null-184.26722874463692null-133.1431321175204574.59053746407422202.1352109171107359.7077879053590256.8380340439088640.879579195664384-4.55171899926634-238.70851140852602-42.1155412076812390.11256619041974145.0125808782271422.6658521660585495.11558597297777null-125.5502652661644352.7025693334931132.46860248745267-33.512875278401445476.6792348976092-79.18337942584824-74.14179082164429-41.266969193672947.71629927127575143.9445875102196181.36426988211096-69.65428527501658274.91322221891005nullnull-131.58666470357792nullnull
23784.451240675879471.321273935030.457422870985222050.4268551454069247676-374.8550728986198121.40864380007741-68.1068678054876104.3832347897971369.3908096488636162.05274477649988-11.466977469013521-66.60701925534991129.32341494795483-39.32678866353477119.94082848795465106.9690978355410243.09237294360005441.8852989991237859.05882460337096159.63364757063425384.25336043952893-16.41029959655362440.31126699696183-30.429423791298905-66.0320486628683-112.622703991293434.9002279906796105212.74618285789907-416.66606879794233773.3933515881959-214.14077640614667-45.08991189057026null-372.0160055186541-41.75808337805402-38.69267497789450558.11990111768103278.4502508351791-13.026154595912175115.18705521926314null-119.84410677488569null
60357.7671046768836362.9423382804160.73090451683131730.7097989887396559441-274.66027902830257521.6891344254975-54.2616469965427811.034705582414645null29.746995286258873-53.17608545372655-67.05957263955672132.0728907007528-60.87638871729156131.50090339159988203.1382035690564106.13602494627727424.051368217131731.63802009215771null-5.48745908559665654.7266696059491249.1938206099524111.70449381918253null-232.05808392003297-35.29785012450751-136.5330688888815337.1369692355582null-36.00742867113516-56.229643263592855-72.14878955553604-37.06240287958022695.2914975698370260.82970551820889-27.68271656075728272.162663366017null94.87003653214806null144.251665498647null
-1218.57466620571919718.506549075040.77130021695270770.7471517926557887357-72.257713081173-49.85929842719285-211.2636942455311-27.57997808055253null-75.50173620084098null-161.8201240885745247.30863104595183.396215514428742512.811888479920523-251.79232353145417-35.2864002911817753.458748174745313.560051694389374642.4574680016814-184.83947729446714null162.10419270780184-39.632127925263454null-66.342839447075-15.07518088333041119.491971695840036-75.4654297663683991.39181298779314-28.4997758728612942.2453915091989816.88502312822295-12.11655999420528272.19954098510723162.49358466930883-42.516484709097085219.19559923220302-431.836434725115-328.1383711936839-234.51852042599768-484.60753138631475null
-1378.1015106578755122978.5809373010.467804405904162950.459643926425693852451-424.92037212084466-166.09303023119978-39.3446929904194485.82114306500866137.187935418715110.96311999510799-103.06958727206026-134.33114731778920.03754652712954617.43853041092788-5.6954214088537359.198049953451722.67177067017284-25.07504810087769472.06442613651546368.0560195813854477.2241456827227879.80001453956531114.9564207928368516.049082514063464null-175.3554727297124326.39569787591767-54.6905642088644941.598407891408218745.1193831073277756.70836201347351-40.505241455102336107.8022133937498236.81130668296495-32.3028529483576769.55812502071106-68.13543982064049345.3918749198147-79.28532410007654-118.15562414778408null-150.26170456575025376.51090976345085
-687.286417685177427201.901071827710.80320811741026190.7528293954672889158-157.1609634927663-64.5709715669349653.1828527443725794.88211000107712null-390.8308075788937-13.571367790820226-22.536596270165354128.97695201900763102.0597683527586724.183318504092234null16.355487710718418127.8690987758701943.25431752571654487.86105368654177-109.58681606903335-63.92797649347925-170.52296683122148-40.837336079702474342.8793868253843null65.36478574269599209.12058248849124-122.27793574233841null-19.931488728605075-223.205528736551670.92213070942245-34.47384307137745237.7422004559916577.78391504120353-123.50343753584625371.0565569736201null-151.67895990788674null9.405955807434907null
6938.6661232880315116215.76576829820.57393998994563590.56414246330050751647433.09647125593057316.3815557253819-32.463561361264865126.51052626120192-59.37678768543518-149.40838812609778-80.91252745408286-159.28031263162242-118.91828803480813-102.46886939517145106.44041803753595178.717413375407593.91654193648633-12.751590569550217-118.41420280450367653.6836331076154440.1067770401045249.35439935339642237.51330903241373.06687039846894-114.55340526512461126.70745597710079-29.94933588036966354.1631828978162711.610120114083086169.15082423494735-138.2433932820806681.17005363532591-47.12990461009268-13.455829605348958-78.06494054824304282.476932772018819.716827585889764173.76732327955258169.34473757883217223.79420517240726201.26715497237134nullnull
-15502.32561912681273901.552300786770.52310142254394630.49692153030971187712-133.13596994274-221.03936406915165-51.49252642924507599.14055209001363-1034.8334907260462250.28890914675108-2.7377650507416154-91.41008628592331-131.69652215654483-2.6122259300146586-56.00075748154285-271.896784140570762.096651929003365102.4968069880712846.32264956525707-6.015310276357605590.259406380855750.77336767516402-91.3301804505636375.58735122621697null4.071085238559591-137.71920828588088-16.29400819539662-58.187780005465214-114.84589448120143-96.7137435912915-13.699948287770393-46.12521419709434-38.39750199423236102.56655240918691202.23119475753825-95.85116982935602279.241239905932163.7004495062206666-92.07231422603667null-369.4427400540135-304.83511629067397
24542.2860572340256397.752087202640.52446114984969790.517392859756629924594.860278270571332318.29666047084913-187.224295527976126.155215757094574-5.47297604400991797.07690900153023-37.33216019527834-19.029714765191045-109.87715332391849-7.462968691484177-63.235360605069253.389379848847898-23.303153650402393-3.703302356993638556.69241197676432-115.4472630835471392.0575176019072828.94525050253587168.858886620628876.90112267797271null12.303122094379903-77.0118420016598-85.179497498476443.18314893608800568.67569501925516-52.220395222384423.824975442287383431.7917979935267789.751608656537579107.64133410796138200.08908263234014-88.0168333214913215.7126624858332-149.93201866346394-395.50781920954523-29.495660316043043nullnull
1605.392072416239256407.328007476060.52191455487285840.51093490226167071560-130.3244265800866-27.23076832926678-231.4158274744667433.6247526212210468.65401911888837103.3788133818211-7.973917437881832-74.35954041069232-3.3346304401337963-23.8232313977700786.69141339391138-54.3687106882717358.3420718588214781.262932496206454.63957745275495114.80749409786476319.24648549856903-4.117967413695569566.4687929964945927.079133628478424null-20.737443432206224-48.102842150248190.4727388274655018-12.87177375430519null-33.1319047993576812.685999793389213-56.697906753193834null70.1370805566009867.70451540287294-47.710807911575756276.0002047793718null-25.422334338912883-55.969687032532086-28.819447499631895227.30320594740968
-10373.19012763601555803.1033102320.63507979786139980.62953904405414982408-70.64114247273318-116.10922041878504-128.673523397755286.37822457119202-12.250470123895829-6.14969845060051726.457384545212516-78.44746665585842-114.1906196863163818.5961794129556442.34643731460203-48.64985399162762-17.3274503643672841.878531576612262324.724105136680485183.77588583653144279.3546192364377772.85200132778102-5.865034312084763-63.855095114385314null-39.06335244599379-19.66543350584555262.76846713323136-16.750684228155798100.74220215092244-94.7689870036347840.260651774319406-75.73573441330893-55.89322743292430641.6856463360045373.16769508983752-68.97732904460754277.824648006033863.6487324302633168.84088325646371null74.24552987159284null
4288.82698452172816377.9750123025790.67909595281507040.6620783139492029696-27.8482729242364333.7882039844869636.3935998779630435.76378513147984null9.700567211347646-69.56759716725806-10.45860383288184-76.77966045776316-47.438668900023046-8.823850512396776-25.45592234073411614.291899479913427-36.41841237761906113.3217965612639414.033142011078478174.128389224558-10.46158052158916442.03665731277822-35.595036672693645null23.1281144361365070.1209943700157065518.143211155784705-55.13210333835353195.31093568666887-40.0330700732233257.66550685745028-12.8168485119203094.05201559311122647.7590003941043328.741972261839113-53.265773909818705262.5440112581235-371.888571721077146.497213583388266null-43.72098949432198null
-2837.7957363372025144617.806672055540.47568209998637090.47335397656962428597-328.58498714006083-211.94428046777253-93.825475871317249.044461001979593-40.360438935848855101.12000038740449-69.5173460087785-161.179705374743673.15402590693054-32.5562702031938959.6794094006188589.95995732449444-0.3222003674096389167.1695901087543339.130468707915476292.10748963502897111.2901017843649982.6914054322254482.722742882709492.51881497947652251.43468737052171-71.60449498954856-19.19940901667895318.87234027106648886.79986869653806187.9067144806307-77.49264895119924-6.8720802268713573.8509830247395335.86056235553572113.85445806916508254.08420032783633-48.44161409282177241.23985895351896-177.41893328178642101.18683574951795115.66595989948824null510.427966359338
-172.6788269512451338088.047466690370.47345495977938210.46100872113989281560-18.79964242023619-14.591082680506041-53.6699175244517449.68304003459362-78.8490006987192140.92630232228785-3.793474692952554-80.19833514764339-77.0659366690544-44.2172675799745729.4227930679885452.3868041060422527.841768649396144-26.241876012774927-19.53588119195603223.07902645261925241.28504837779684106.0868312822518252.4907116443111752.16207128914166null23.22949464167863-17.084281934654303-37.56984045329461-57.08342848539979171.8942821332229644.18459085656508-50.1642449435411615.134044546735549-20.6356800341486156.9724198854166282.44824688876417-46.349264001783474199.71886794297023-141.7249319038696220.11280012432865-8.398322327764385nullnull
531.853839709857842665.184948073050.74606994670490430.7234983864120069442-119.72583188648206-45.42629808673888-79.8176957329507578.5310738283855398.6272733866532148.17592575874752539.48117713255342-272.3412270695344-68.90812987544048null-25.559077298202713-115.76749827848703-12.897698370064854-13.21049860044509486.06067986524096215.02466796487198181.02540948075116-339.1995663520371170.70064239110496142.95218961055306814.8337391965733143.47766112175503-77.63515288083543-45.545854148834735-238.71730046075288449.0585381698006-114.41320014742114138.7510776021374-145.2156979410017-75.707367889133759.75743477659415113.9266877299749-145.94510484142248278.7523339157194-82.07873871998311null-28.04415741009968-290.68124748603356null
-15134.758273455582181533.821976842970.485316697512662150.47280582722937781518382.92708156333045.484436794120029-22.87170872712219748.41557395252503null-112.3511383212944467.42247789367626-93.88944408862832-181.189102106938638.2920494413370961.93730158597654104.45877487343377-202.3278249418336-225.83820660749578-168.05314559589883271.95393379164506-314.353936349045622.262385741723296270.1114259823257223.1196132629584494.2378006680690184.87180909776973-17.17418460851824-279.6866046445716335.85312223437825174.81851717878155-170.748883863749417.80890234086523170.1711470039743783.2469184837370594.07382995535357499.76568464441146-147.05691004381092133.92794931878126null-52.2384590720169191.99580463184512null126.95983685398055
4876.4276709271661121.113668526510.5180204971939710.5092435551553597195846.63812883782118572.16375986546825-47.914561703586934122.35870554028686203.9399089362220399.235269528297126.7596349292657-68.01778655709771-65.20407818204917-28.99537612843944220.06740009921409836.698861384459086-10.65356107687924881.933411562932990.529008900392065743.74401539541504207.3769689598104-23.9985215356301329.5212745389543325.752946938640573nullnull-22.5082260235883-71.178820797655776.793034892766783-0.2742765824062451-84.37499919476795-19.51596955606327125.56654237823227.423620180164447510.01220738580800390.04746451318489-35.1029015676457186.7072139116046-93.40607115737626-140.20687809295217null-176.34083216244272null
-61367.340321044416226826.974212450030.64033853520025240.63034022420968031332-339.46633960336595-623.5007237413307-85.38941233412557147.71915728155594-50.4156792319851210.059676324963624.129917693771457-162.91439374005094-253.4216002281301-81.06448708802644181.22336847895716-62.19866739403955-45.1343512936230847.35188471951733-36.67493535515597412.42690784982676107.7895215601320348.8111660613131512.324873547711164-36.00688471450863null27.78042607683597680.243971207448957.0022297452094735-52.5637447449025278.17869454064487-64.54349668301008-30.627746248458074204.49675768930177110.37515642935973149.71538415894096161.9966476944709-38.67742278379611508.0767292001992null-173.3279020676866215.0450925848481179.1421196737831null
-13653.89997226064669144.510261269420.58628163337361570.57458523367093121274-20.258361623408565-180.1088412412601-66.157071480393874.38288298363022204.363911680301465.70848267741255-53.23517297805994-74.7742033192842-38.71131776715632423.41452627400873673.3772123589503525.87337336267419-22.63837655599944-3.89533473580380317.7913742516633180.4242818985817596.17894182574993-136.0385105844647235.35342605823043-0.37598515257656095null-27.710982775594015-23.26636635907453581.20326375211562129.66483719439648null-49.76831488555822691.6522707823840220.487481217252230.377905334757924-196.22674788677847106.21917665328151-11.526678268032422246.20941878975455120.79406634189404113.01840466849798null-294.25476230760205null
-17673.73363666568139154.963674937640.74234205102477220.73645272647676711612-911.3735703593856-415.074850060885333.48803408530418100.30703896879373176.7160164939457185.90783581026926-57.25558802318491-70.325366550166638.4866469027606-66.2463078076392476.21406649005478-72.3808338526705541.939706460253044.064755505688226138.7081304427151117.5247584740127384.94161973456893-180.0128367026364637.09333201882134100.49398627449756null-44.47660073491077555.5560818279685348.51250037855895-132.334306850423629.154010922734056-52.44411739272679-57.92427579192688-26.7214086273621-36.04759357496705188.721565068664883.6446077146288623.598758360220597510.5763414635938567.404144313433256.973073856393306168.268367020253nullnull
4477.67259145052149859.679907025230.57110072995899950.4655255250258301163-100.3447664467603712.61544802449006482.02000370256117-27.10108762018896null47.118351350544984-14.3913879527033-243.92253384649976-27.1025339216738076.206293678769761138.22343472421124-26.0821472958414366.0418551727642-186.54030739835332-8.043419055072159.362647461080947-34.01172397816503-63.1529115619126151.3727374285695224.476048442435616nullnull1.400483319259911310.653188780709298-70.705429124638858.340498701916495-46.3564393401468853.30631548279428-183.42745721838094null50.28759399183382130.24453943821578null153.41914708451742null-74.8240783262807714.876479275033747-51.86152475249419null
-17796.75557097264875324.159482426110.396236239096054770.38942046757126893226218.258813300685-129.8974248227205-78.7415181408547238.51478684455529630.45613577795082325.608229683216287-40.89740797396682-55.1713074742079429.632254017572805-36.46423533617386165.4035100852390279.2766404475453399.2730470406602742.659168318315025-34.497868138519null210.77244498781243-75.294908212500771.5768681602608607-9.369575530280603136.95522135443386-14.541033959779405-61.855680889584114-5.095606284061452116.63613449061317176.44140731138828-66.277140260366611.222987457887968562.2296516976703536.8362668359997263.67644101081646138.48381483302032-26.681383386251042170.543746583758139.99834206550906-5.65141335105205-63.02614199373995nullnull
4225.56814834963468501.11095313930.68923525961002430.5769106546497922114-115.78140930887115-16.119601810277786-177.4061738214088-45.56333760095703null272.9344681915783202.88703144637648-280.35002979660436-18.297040742115847-213.695627796459898.58971228718464434.85532488807286-119.64700438364623-18.674486463537054336.12727015860344-305.35772408556795218.06094740197162null518.368322165240287.7093964834514null56.05538975068374-22.001299658290584-134.11280526954633-6.170532440938332null-110.0557217118616452.60412021298848nullnull-29.71047161373933265.25448384685797-82.10602912866803192.02420666466563nullnull-41.66416780486817-51.36439257210554null
14489.84394294450644403.392391609850.72400332567433930.6831149294779452249-421.6078808114914null8.8212504084948624.676629128972333175.87424773222287-62.13738443278895-72.64493154701309-49.129911144411125327.9970432383937-118.81105726558557377.4559680804381273.248555724756445.222777862381211177.46532146265898-12.45944008850763685.72211284978218157.97962067868832null145.6432374398857-0.8957433865230466null-251.11271627421877-72.85812457168069168.984759133821546.634720420981287null-105.25149570514598-57.35236927355806259.14623618417204-31.125307986679974354.6434709699127368.0931838203561623.55273534597773nullnull-252.3530270240893322.233816922944147706.9089427375208null
-9060.405647152316117856.241995368120.57903278274835430.559259291703159684862.160825993898634-81.597665320059766.724394556225381586.57696067207443-262.0510740035791104.80203467849974120.66392308301263-128.63922137892158-253.8090520278561522.53684029896325296.49041635330444-93.90876514870905-24.41121482062129650.37736038321856-0.8702088565205505-72.29791420124165230.322871360889191.3506705142803122.74084851804222429.779316900826284-90.57239624713412.68706215268591-13.131889311912904-52.1545935791830360.52747530589897556.1296925744804-1.0285750805309892-109.22205211706536300.463898577104651.987184279614965-232.2923732844803297.0362268754580648.756397355362594156.3890402444472782.90727467388628-250.9409790689037null432.43813910982954247.58773320878964
-26254.83874543053441348.979636892740.64666159908462670.6335307801316905922490.3022145050106-124.44344358206787-106.139867067851744.71013240715845null93.19115253289321-67.77514734881598-83.30583206022082-60.58482323782481469.50758109001936-14.06525634702190621.73978245955124757.6655460530596-2.731415878147764-10.828680552692335null268.6634651080263-25.09820742467662736.86411458889938-5.082540101389318null-52.6224508433759436.96180352822588-32.942442144815104-139.07203820152418-202.73001407680184-99.92028229130256-35.18798955518171-726.9695265139991null-43.22507622541276139.4809345823902-20.8397168651989197.9016976046960453.77057795986393null71.5187105553649365.49092452557295null
In [ ]:
# Optional: Save to CSV
combined_results.to_csv('combined_results_by_cluster.csv')
In [ ]:
combined_results.describe().to_csv('combined_described.csv')
combined_results.describe()
Out[ ]:
intercept mse r2 adjusted_r2 row_count latitude longitude has_Tennis has_Parking has_Alarm has_TV has_Clubhouse has_Playground has_Refrigerator has_Cable_or_Satellite has_Unknown has_Gated has_Pool has_Wood_Floors has_Internet_Access has_View has_Elevator has_Hot_Tub has_Gym has_Storage has_Doorman has_Dishwasher has_Washer_Dryer has_Patio/Deck has_Garbage_Disposal has_Luxury has_AC has_Fireplace has_photo_no has_photo_yes pets_allowed_Yes bathrooms bedrooms scaled_square_feet week_1 week_2 week_4 week_3 has_Golf
count 69.000000 6.900000e+01 69.000000 69.000000 69.000000 69.000000 68.000000 65.000000 69.000000 41.000000 67.000000 64.000000 66.000000 68.000000 66.000000 68.000000 64.000000 66.000000 68.000000 69.000000 57.000000 64.000000 62.000000 65.000000 68.000000 21.000000 63.000000 68.000000 68.000000 66.000000 46.000000 68.000000 69.000000 62.000000 58.000000 69.000000 69.000000 68.000000 63.000000 42.000000 49.000000 45.000000 46.000000 17.000000
mean -5635.192481 1.933318e+05 0.593290 0.565340 1442.275362 -103.502490 -76.985637 -54.019427 41.746411 94.786286 43.185032 -8.151864 -100.627669 -33.912791 -20.366491 74.743513 10.056554 31.316436 22.051760 48.704407 110.583373 161.483534 -6.254724 66.239768 18.708088 400.255311 -16.044055 -3.314672 -6.296264 -52.751475 125.262650 -53.041872 74.196465 -14.449171 -17.594195 60.712411 175.752144 -33.126419 286.244631 -13.416755 -108.186387 13.363597 13.209264 260.479285
std 51299.270208 4.184157e+05 0.138231 0.133029 1750.669921 439.870653 658.589008 149.864074 77.930061 1236.716968 161.483473 105.093455 128.948385 117.935721 141.007635 97.659861 160.345353 125.869482 187.254806 143.561311 217.606344 226.482178 177.993273 135.767728 111.562116 582.868323 111.820267 79.384736 102.220587 185.351045 246.521291 130.539638 707.942603 205.019233 96.288544 132.087456 156.405096 102.850047 165.031000 286.111415 263.170597 160.441376 274.773274 572.135298
min -263846.438936 1.270063e+04 0.109622 0.082412 58.000000 -2590.291689 -4059.718771 -701.967604 -169.357164 -1454.094111 -390.830808 -248.540290 -656.544585 -437.039294 -635.922424 -143.007254 -333.804514 -270.862437 -636.120235 -238.708511 -305.357724 -563.462089 -592.500676 -170.522967 -558.773836 -270.449520 -370.746099 -197.107983 -279.686605 -802.984673 -485.441599 -277.205765 -434.857760 -726.969527 -372.016006 -277.463632 -170.473769 -473.745244 -76.829368 -618.847485 -1439.437462 -371.165253 -484.607531 -1003.836785
25% -14855.705798 4.266518e+04 0.499243 0.473379 391.000000 -157.160963 -132.030763 -93.825476 4.676629 -84.731674 -41.400812 -61.987263 -138.575554 -91.608651 -70.545170 19.824065 -74.070797 -24.134200 -32.726767 -12.459440 -15.219875 54.597497 -66.338824 1.110050 -24.960274 -77.567813 -64.105651 -42.845481 -54.370399 -117.839281 13.543877 -116.894043 -56.229643 -81.400220 -43.935488 -1.249423 89.265818 -72.188392 200.931727 -170.547205 -176.894240 -63.026142 -142.657305 30.179795
50% -172.678827 6.909435e+04 0.583405 0.559259 808.000000 -40.682915 -20.024286 -53.669918 49.683040 -12.250470 28.918461 -21.598689 -89.314838 -37.180299 -32.124089 58.212190 10.988631 20.004812 20.631098 29.185230 87.861054 146.087680 1.574906 40.473537 15.484724 251.434687 -15.565721 -13.511759 -5.635583 -29.024385 96.420132 -78.338014 -13.699948 -14.927983 -0.110695 58.473299 139.480935 -30.663643 257.346292 -45.111300 -52.238459 -8.398322 0.127003 214.278831
75% 8025.502776 1.391550e+05 0.693826 0.671222 1612.000000 84.114462 99.908694 -7.845285 86.576961 131.467717 112.371430 35.120239 -52.379762 29.960110 20.520557 123.667236 96.630459 68.733904 87.074260 89.039076 216.805173 269.638633 85.075218 114.956421 74.081189 814.833739 36.208420 38.872538 31.936922 47.201458 193.459880 -28.050793 48.383510 68.944620 36.573621 115.876091 219.582368 23.564241 307.662442 109.977813 47.654158 115.665960 124.145645 376.510910
max 167884.620031 2.251679e+06 0.885510 0.825288 8597.000000 490.302215 1864.419872 686.097725 273.547572 7376.139515 559.401825 371.102219 266.413443 327.997043 580.316440 377.455968 434.855325 509.007485 828.207923 895.038092 841.417222 788.886661 483.665764 692.402872 227.875079 1778.738531 224.928337 235.076917 220.083131 335.853122 773.393352 530.067121 5795.457881 942.315732 132.548000 354.643471 776.553136 274.249817 986.087635 787.484329 223.794205 355.326303 778.523095 1559.394386
In [ ]:
from pyspark.sql import functions as F
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

def plot_clusters_on_map(df):
    df = df.to_spark()
    # Collect the data
    data = df.select('latitude', 'longitude', 'clusters').collect()
    
    # Separate the data into lists
    latitudes = [row['latitude'] for row in data]
    longitudes = [row['longitude'] for row in data]
    clusters = [row['clusters'] for row in data]

    # Get unique clusters and their range
    unique_clusters = sorted(set(clusters))
    min_cluster = min(unique_clusters)
    max_cluster = max(unique_clusters)

    # Create a custom colormap
    n_bins = 15  # Number of color bins
    colors = plt.cm.rainbow(np.linspace(0, 1, n_bins))
    cmap = LinearSegmentedColormap.from_list('custom_cmap', colors, N=n_bins)

    # Create the scatter plot with a square aspect ratio
    plt.figure(figsize=(12, 12))  # Equal width and height
    
    scatter = plt.scatter(longitudes, latitudes, c=clusters, cmap=cmap, 
                          alpha=0.6, edgecolors='none')

    # Add a color bar
    cbar = plt.colorbar(scatter)
    cbar.set_label('Cluster')

    # Set labels and title
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.title('Geographical Distribution of Clusters')

    # Set aspect ratio to 'equal' for a true square plot
    plt.gca().set_aspect('equal', adjustable='box')

    # Add grid lines
    plt.grid(True, linestyle='--', alpha=0.7)

    # Adjust plot limits to focus on the data
    x_range = max(longitudes) - min(longitudes)+ 5
    y_range = max(latitudes) - min(latitudes) + 5
    max_range = max(x_range, y_range)
    x_center = (max(longitudes) + min(longitudes)) / 2
    y_center = (max(latitudes) + min(latitudes)) / 2
    
    plt.xlim(x_center - max_range/2, x_center + max_range/2)
    plt.ylim(y_center - max_range/2, y_center + max_range/2)

    # Improve the layout
    plt.tight_layout()

    # Display the plot
    display(plt.gcf())
    plt.close()

    print(f"Clusters range from {min_cluster} to {max_cluster}")

# Assuming 'clustered_df' is your DataFrame with clusters, latitude, and longitude
plot_clusters_on_map(clustered_df)

print("Cluster map has been displayed.")
No description has been provided for this image
Clusters range from 0 to 68
Cluster map has been displayed.
In [ ]:
import pyspark.pandas as ps
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import colorsys
import numpy as np

def generate_colors(n):
    HSV_tuples = [(x * 1.0 / n, 0.5, 0.5) for x in range(n)]
    return list(map(lambda x: f'rgb{tuple(round(i * 255) for i in colorsys.hsv_to_rgb(*x))}', HSV_tuples))

def calculate_zoom(x1, x2, y1, y2):
    max_bound = max(abs(x1-x2), abs(y1-y2)) * 111
    return 11.5 - np.log(max_bound)

def plot_clusters_on_map(df):
    # Ensure df is a pandas-on-Spark DataFrame
    if not isinstance(df, ps.DataFrame):
        df = ps.DataFrame(df)
    
    # Convert to numpy arrays
    latitudes = df['latitude'].to_numpy()
    longitudes = df['longitude'].to_numpy()
    predictions = df['clusters'].to_numpy()
    
    # Get unique clusters
    unique_clusters = np.unique(predictions)
    
    # Create a colorscale with enough unique colors
    colorscale = generate_colors(len(unique_clusters))
    
    # Calculate bounds and zoom
    x1, x2 = np.min(longitudes), np.max(longitudes)
    y1, y2 = np.min(latitudes), np.max(latitudes)
    center_lon = (x1 + x2) / 2
    center_lat = (y1 + y2) / 2
    zoom = calculate_zoom(x1, x2, y1, y2)
    
    # Create subplot with two columns
    fig = make_subplots(rows=1, cols=2, column_widths=[0.8, 0.2], 
                        specs=[[{"type": "scattermapbox"}, {"type": "scatter"}]])
    
    # Calculate the maximum prediction value for size reference
    max_prediction = np.max(predictions)
    
    # Add scatter mapbox trace for each cluster
    for cluster, color in zip(unique_clusters, colorscale):
        cluster_mask = predictions == cluster
        cluster_lats = latitudes[cluster_mask]
        cluster_lons = longitudes[cluster_mask]
        
        if len(cluster_lats) > 0:
            fig.add_trace(
                go.Scattermapbox(
                    lat=cluster_lats,
                    lon=cluster_lons,
                    mode='markers',
                    marker=dict(
                        size=8,
                        color=color,
                        opacity=0.7,
                        sizemin=3,
                        sizemode='area',
                        sizeref=2.*max_prediction/10.**2
                    ),
                    text=predictions[cluster_mask],
                    hoverinfo='text',
                    name=f'Cluster {cluster}',
                    showlegend=False
                ),
                row=1, col=1
            )
    
    # Add traces for the custom legend
    for cluster, color in zip(unique_clusters, colorscale):
        fig.add_trace(
            go.Scatter(
                x=[None], y=[None],
                mode='markers',
                marker=dict(size=10, color=color),
                showlegend=True,
                name=f'Cluster {cluster}'
            ),
            row=1, col=2
        )
    
    # Update layout with calculated center and zoom, and zoom constraints
    fig.update_layout(
        mapbox=dict(
            style="open-street-map",
            center=dict(lat=center_lat, lon=center_lon),
            zoom=zoom        
        ),
        showlegend=True,
        legend=dict(
            itemsizing='constant',
            title='Clusters',
            bgcolor='rgba(255,255,255,0.6)',
            bordercolor='rgba(0,0,0,0.5)',
            borderwidth=1
        ),
        width=1200,
        height=800,
        hovermode='closest'
    )
    
    # Update the second subplot (legend) layout
    fig.update_xaxes(visible=False, row=1, col=2)
    fig.update_yaxes(visible=False, row=1, col=2)
    
    # Display the plot
    fig.show()

    # Print cluster range
    min_cluster = np.min(predictions)
    max_cluster = np.max(predictions)
    print(f"Clusters range from {min_cluster} to {max_cluster}")

# Assuming 'clustered_df' is your pandas-on-Spark DataFrame with clusters, latitude, and longitude
plot_clusters_on_map(clustered_df)

print("Interactive cluster map has been displayed.")
Clusters range from 0 to 68
Interactive cluster map has been displayed.